From f702e7c16e5fb7bf788a2f8318b61d7368a785b9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 16 Nov 2022 18:02:34 -0500 Subject: [PATCH 001/163] first lift of a func --- data_specifications/specification.proto | 9 + include/anvill/Declarations.h | 13 ++ lib/Lifters/FunctionLifter.cpp | 217 ++++++++++++------------ lib/Lifters/FunctionLifter.h | 35 +++- lib/Protobuf.cpp | 15 ++ lib/Protobuf.h | 4 + lib/Specification.cpp | 1 + 7 files changed, 182 insertions(+), 112 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index ecd4c63a8..466c37e12 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -158,11 +158,20 @@ message Callable { ReturnStackPointer return_stack_pointer = 10; } +message CodeBlock { + uint64 address = 1; + string name = 2; + repeated uint64 incoming_blocks = 3; + repeated uint64 outgoing_blocks = 4; + uint32 size = 5; +} + message Function { uint64 entry_address = 1; map context_assignments = 2; FunctionLinkage func_linkage = 3; Callable callable = 4; + map blocks = 5; } message GlobalVariable { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index c650f429b..9fe392c12 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -8,12 +8,15 @@ #pragma once +#include <_types/_uint64_t.h> + #include #include #include #include #include #include +#include #include #include "Result.h" @@ -39,6 +42,12 @@ struct Register; } // namespace remill namespace anvill { +struct CodeBlock { + uint64_t addr; + uint32_t size; + std::unordered_set outgoing_edges; +}; + class TypeDictionary; // A value, such as a parameter or a return value. Values are resident @@ -195,11 +204,15 @@ struct FunctionDecl : public CallableDecl { bool lift_as_decl{false}; bool is_extern{false}; + // The set of context assignments that occur at the entry point to this function. // A called function may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a function. std::unordered_map context_assignments; + // These are the blocks contained within the function representing the CFG. + std::unordered_map cfg; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 26e5b4ae9..ec4ec41dc 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -8,13 +8,16 @@ #include "FunctionLifter.h" +#include <_types/_uint64_t.h> #include #include #include #include #include #include +#include #include +#include #include #include #include @@ -32,13 +35,17 @@ #include #include #include +#include #include +#include #include #include #include #include +#include #include +#include #include #include #include @@ -192,34 +199,22 @@ llvm::BranchInst * FunctionLifter::BranchToInst(uint64_t from_addr, uint64_t to_addr, const remill::DecodingContext &mapper, llvm::BasicBlock *from_block) { - auto br = llvm::BranchInst::Create( - GetOrCreateBlock(from_addr, to_addr, mapper), from_block); + auto br = llvm::BranchInst::Create(GetOrCreateBlock(to_addr), from_block); AnnotateInstruction(br, pc_annotation_id, pc_annotation); return br; } -// Helper to get the basic block to contain the instruction at `addr`. This -// function drives a work list, where the first time we ask for the -// instruction at `addr`, we enqueue a bit of work to decode and lift that -// instruction. -llvm::BasicBlock * -FunctionLifter::GetOrCreateBlock(uint64_t from_addr, uint64_t to_addr, - const remill::DecodingContext &mapper) { - auto &block = edge_to_dest_block[{from_addr, to_addr}]; + +llvm::BasicBlock *FunctionLifter::GetOrCreateBlock(uint64_t baddr) { + auto &block = this->addr_to_block[baddr]; if (block) { return block; } std::stringstream ss; - ss << "inst_" << std::hex << to_addr; + ss << "inst_" << std::hex << baddr; block = llvm::BasicBlock::Create(llvm_context, ss.str(), lifted_func); - // NOTE(pag): We always add to the work list without consulting/updating - // `addr_to_block` so that we can observe self-tail-calls and - // lift them as such, rather than as jumps back into the first - // lifted block. - edge_work_list.emplace(to_addr, from_addr); - this->decoding_contexts.emplace(std::make_pair(to_addr, from_addr), mapper); return block; } @@ -227,7 +222,7 @@ llvm::BasicBlock * FunctionLifter::GetOrCreateTargetBlock(const remill::Instruction &from_inst, uint64_t to_addr, const remill::DecodingContext &mapper) { - return GetOrCreateBlock(from_inst.pc, to_addr, mapper); + return GetOrCreateBlock(to_addr); } // Try to decode an instruction at address `addr` into `*inst_out`. Returns @@ -1020,77 +1015,6 @@ llvm::Value *FunctionLifter::TryCallNativeFunction(FunctionDecl decl, return mem_ptr; } -// Visit all instructions. This runs the work list and lifts instructions. -void FunctionLifter::VisitInstructions(uint64_t address) { - remill::Instruction inst; - - // Recursively decode and lift all instructions that we come across. - while (!edge_work_list.empty()) { - auto [inst_addr, from_addr] = *(edge_work_list.begin()); - auto insn_context = this->decoding_contexts[{inst_addr, from_addr}]; - - - edge_work_list.erase(edge_work_list.begin()); - - llvm::BasicBlock *const block = edge_to_dest_block[{from_addr, inst_addr}]; - CHECK_NOTNULL(block); - if (!block->empty()) { - continue; // Already handled. - } - - llvm::BasicBlock *&inst_block = addr_to_block[inst_addr]; - if (!inst_block) { - inst_block = block; - - // We've already lifted this instruction via another control-flow edge. - } else { - auto br = llvm::BranchInst::Create(inst_block, block); - AnnotateInstruction(br, pc_annotation_id, pc_annotation); - continue; - } - - // Decode. - auto next_context = DecodeInstructionInto(inst_addr, false /* is_delayed */, - &inst, insn_context); - if (!next_context) { - if (inst_addr == func_address) { - inst.pc = inst_addr; - inst.arch_name = options.arch->arch_name; - - // Failed to decode the first instruction of the function, but we can - // possibly recover via a tail-call to a redirection address! - if (inst_addr != func_address) { - // TODO(Ian): is this context right? - this->BranchToInst(func_address, inst_addr, insn_context, block); - continue; - } - } - - - // TODO(Ian): If we hit this in our new model then the low level lift is a failure and we need to mark this somehow... - // otherwise we are inventing the abscence of control flow... - LOG(ERROR) << "Could not decode instruction at " << std::hex << inst_addr - << " reachable from instruction " << from_addr - << " in function at " << func_address << std::dec; - - auto call = - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); - AnnotateInstruction(call, pc_annotation_id, pc_annotation); - MuteStateEscape(call); - continue; - - // Didn't get a valid instruction. - } else if (!inst.IsValid() || inst.IsError()) { - auto call = - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); - AnnotateInstruction(call, pc_annotation_id, pc_annotation); - MuteStateEscape(call); - continue; - } else { - VisitInstruction(inst, block, insn_context); - } - } -} // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. @@ -1491,12 +1415,92 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } + +llvm::BasicBlock * +FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, + const CodeBlock &blk) { + auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), + "", basic_block_function.func); + remill::Instruction inst; + + auto reached_addr = blk.addr; + // TODO(Ian): use a different context + auto init_context = this->options.arch->CreateInitialContext(); + while (reached_addr < blk.addr + blk.size) { + auto res = + this->DecodeInstructionInto(reached_addr, false, &inst, init_context); + if (!res) { + LOG(FATAL) << "Failed to decode insn in block"; + } + + reached_addr += inst.bytes.size(); + + // Even when something isn't supported or is invalid, we still lift + // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want + // to treat instruction lifting as an operation that can't fail. + std::ignore = inst.GetLifter()->LiftIntoBlock( + inst, bb, basic_block_function.state_ptr, false /* is_delayed */); + } + return bb; +} + +void FunctionLifter::VisitBlock(CodeBlock blk) { + + auto llvm_blk = this->GetOrCreateBlock(blk.addr); + llvm::IRBuilder<> builder(llvm_blk); + auto bb_lifted_func = + this->CreateLiftedFunction("basic_block_func" + std::to_string(blk.addr)); + + + this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); + std::array args; + args[remill::kStatePointerArgNum] = state_ptr; + args[remill::kPCArgNum] = + options.program_counter_init_procedure(builder, pc_reg, blk.addr); + args[remill::kMemoryPointerArgNum] = bb_lifted_func.mem_ptr; + + builder.CreateCall(bb_lifted_func.func, args); + auto pc = this->op_lifter->LoadRegValue( + llvm_blk, state_ptr, options.arch->ProgramCounterRegisterName()); + + auto sw = builder.CreateSwitch(pc, this->invalid_successor_block); + + for (uint64_t succ : blk.outgoing_edges) { + sw->addCase(llvm::ConstantInt::get( + llvm::IntegerType::get(this->llvm_context, 64), succ), + this->GetOrCreateBlock(succ)); + } +} + +void FunctionLifter::VisitBlocks() { + for (const auto &[addr, blk] : this->curr_decl->cfg) { + DLOG(INFO) << "Visiting: " << std::hex << addr; + this->VisitBlock(blk); + } +} + + +LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { + auto new_func = + options.arch->DefineLiftedFunction(name, semantics_module.get()); + + auto state_ptr = remill::NthArgument(new_func, remill::kStatePointerArgNum); + auto pc_arg = remill::NthArgument(new_func, remill::kPCArgNum); + auto mem_arg = remill::NthArgument(new_func, remill::kMemoryPointerArgNum); + + + new_func->removeFnAttr(llvm::Attribute::NoInline); + new_func->addFnAttr(llvm::Attribute::InlineHint); + new_func->addFnAttr(llvm::Attribute::AlwaysInline); + new_func->setLinkage(llvm::GlobalValue::InternalLinkage); + + return {new_func, state_ptr, pc_arg, mem_arg}; +} // Lift a function. Will return `nullptr` if the memory is // not accessible or executable. llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { addr_to_func.clear(); edge_work_list.clear(); - edge_to_dest_block.clear(); addr_to_block.clear(); this->op_lifter->ClearCache(); curr_decl = &decl; @@ -1543,17 +1547,21 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // Every lifted function starts as a clone of __remill_basic_block. That // prototype has multiple arguments (memory pointer, state pointer, program // counter). This extracts the state pointer. - lifted_func = options.arch->DefineLiftedFunction( - native_func->getName().str() + ".lifted", semantics_module.get()); + auto lifted_func_st = + this->CreateLiftedFunction(native_func->getName().str() + ".lifted"); + lifted_func = lifted_func_st.func; - state_ptr = remill::NthArgument(lifted_func, remill::kStatePointerArgNum); + state_ptr = lifted_func_st.state_ptr; - lifted_func->removeFnAttr(llvm::Attribute::NoInline); - lifted_func->addFnAttr(llvm::Attribute::InlineHint); - lifted_func->addFnAttr(llvm::Attribute::AlwaysInline); - lifted_func->setLinkage(llvm::GlobalValue::InternalLinkage); - const auto pc = remill::NthArgument(lifted_func, remill::kPCArgNum); + invalid_successor_block = + llvm::BasicBlock::Create(lifted_func_st.func->getContext(), + "invalid_successor", lifted_func_st.func); + remill::AddTerminatingTailCall(invalid_successor_block, intrinsics.error, + intrinsics); + + + const auto pc = lifted_func_st.pc_arg; const auto entry_block = &(lifted_func->getEntryBlock()); pc_reg_ref = this->op_lifter->LoadRegAddress(entry_block, state_ptr, pc_reg->name) @@ -1586,30 +1594,31 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - // TODO(Ian): for a thumb vs arm function we need to figure out how to setup the correct initial context, - // maybe the spec should have a section (Context reg assignments or something), where we apply those assingments to a defautl initial context - auto default_mapping = options.arch->CreateInitialContext(); for (const auto &[k, v] : decl.context_assignments) { default_mapping.UpdateContextReg(k, v); } - ir.CreateBr(GetOrCreateBlock(0u, func_address, default_mapping)); + ir.CreateBr(this->GetOrCreateBlock(this->func_address)); AnnotateInstructions(entry_block, pc_annotation_id, GetPCAnnotation(func_address)); DLOG(INFO) << "Visiting insns"; // Go lift all instructions! - VisitInstructions(func_address); + VisitBlocks(); // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. CallLiftedFunctionFromNativeFunction(decl); + + this->lifted_func->dump(); + LOG(FATAL) << "Not fully implemented yet"; + // The last stage is that we need to recursively inline all calls to semantics // functions into `native_func`. - RecursivelyInlineLiftedFunctionIntoNativeFunction(); + //RecursivelyInlineLiftedFunctionIntoNativeFunction(); return native_func; diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index bf0043f74..810bdc0f8 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -9,9 +9,11 @@ #pragma once #include -#include #include +#include #include +#include +#include #include #include #include @@ -48,6 +50,15 @@ class MemoryProvider; class TypeProvider; struct ControlFlowTargetList; + +struct LiftedFunction { + llvm::Function *func; + llvm::Argument *state_ptr; + llvm::Argument *pc_arg; + llvm::Argument *mem_ptr; +}; + + // Orchestrates lifting of instructions and control-flow between instructions. class FunctionLifter { public: @@ -169,11 +180,6 @@ class FunctionLifter { std::map, remill::DecodingContext> decoding_contexts; - // Maps control flow edges `(from_pc -> to_pc)` to the basic block associated - // with `to_pc`. - std::map, llvm::BasicBlock *> - edge_to_dest_block; - // Maps an instruction address to a basic block that will hold the lifted code // for that instruction. std::unordered_map addr_to_block; @@ -181,6 +187,9 @@ class FunctionLifter { // Maps program counters to lifted functions. std::unordered_map addr_to_func; + + llvm::BasicBlock *invalid_successor_block{nullptr}; + // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. llvm::MDNode *GetPCAnnotation(uint64_t pc) const; @@ -198,8 +207,7 @@ class FunctionLifter { // function drives a work list, where the first time we ask for the // instruction at `addr`, we enqueue a bit of work to decode and lift that // instruction. - llvm::BasicBlock *GetOrCreateBlock(uint64_t from_addr, uint64_t to_addr, - const remill::DecodingContext &mapper); + llvm::BasicBlock *GetOrCreateBlock(uint64_t addr); // Attempts to lookup any redirection of the given address, and then // calls GetOrCreateBlock @@ -416,6 +424,17 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // are nifty to spot checking bitcode. void InstrumentCallBreakpointFunction(llvm::BasicBlock *block); + void VisitBlock(CodeBlock entry_context); + + LiftedFunction CreateLiftedFunction(const std::string &name); + + llvm::BasicBlock * + LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, + const CodeBlock &blk); + + + void VisitBlocks(); + // Visit an instruction, and lift it into a basic block. Then, based off of // the category of the instruction, invoke one of the category-specific // lifters to enact a change in control-flow. diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 4d69f8766..12b1768c7 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -500,6 +500,8 @@ Result ProtobufTranslator::DecodeFunction( decl.context_assignments = {function.context_assignments().begin(), function.context_assignments().end()}; + this->ParseCFGIntoFunction(function, decl); + auto link = function.func_linkage(); if (link == specification::FUNCTION_LINKAGE_DECL) { @@ -515,6 +517,19 @@ Result ProtobufTranslator::DecodeFunction( return decl; } + +void ProtobufTranslator::ParseCFGIntoFunction( + const ::specification::Function &obj, FunctionDecl &decl) const { + for (auto blk : obj.blocks()) { + CodeBlock nblk = {blk.second.address(), + blk.second.size(), + {blk.second.outgoing_blocks().begin(), + blk.second.outgoing_blocks().end()}}; + decl.cfg.emplace(blk.first, std::move(nblk)); + } +} + + Result ProtobufTranslator::DecodeGlobalVar( const ::specification::GlobalVariable &obj) const { anvill::VariableDecl decl; diff --git a/lib/Protobuf.h b/lib/Protobuf.h index 9100a9404..4cd794c43 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -74,6 +74,10 @@ class ProtobufTranslator { std::optional address, CallableDecl &decl) const; + void ParseCFGIntoFunction(const ::specification::Function &obj, + FunctionDecl &decl) const; + + public: explicit ProtobufTranslator( const anvill::TypeTranslator &type_translator_, const remill::Arch *arch_, diff --git a/lib/Specification.cpp b/lib/Specification.cpp index bfe63097e..0557d655d 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -449,6 +449,7 @@ void Specification::ForEachCallSite( } } + // Call `cb` on each control-flow redirection, until `cb` returns `false`. void Specification::ForEachControlFlowRedirect( std::function cb) const { From d853624a24cc4dded52fb9185fecf03fa6935154 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 16 Nov 2022 19:43:47 -0500 Subject: [PATCH 002/163] fix entry --- lib/Lifters/FunctionLifter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index ec4ec41dc..1789c6992 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1419,8 +1419,7 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { llvm::BasicBlock * FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, const CodeBlock &blk) { - auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), - "", basic_block_function.func); + auto bb = &basic_block_function.func->getEntryBlock(); remill::Instruction inst; auto reached_addr = blk.addr; @@ -1441,6 +1440,7 @@ FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, std::ignore = inst.GetLifter()->LiftIntoBlock( inst, bb, basic_block_function.state_ptr, false /* is_delayed */); } + bb->getParent()->dump(); return bb; } From 4d4c77ecc17e67b847b66fd1c2696d8e2620c460 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 17 Nov 2022 07:53:36 -0500 Subject: [PATCH 003/163] keep memory pointer consistent --- lib/Lifters/FunctionLifter.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 1789c6992..2f36ab001 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1440,6 +1440,9 @@ FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, std::ignore = inst.GetLifter()->LiftIntoBlock( inst, bb, basic_block_function.state_ptr, false /* is_delayed */); } + + auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); + llvm::ReturnInst::Create(bb->getContext(), memory, bb); bb->getParent()->dump(); return bb; } @@ -1457,9 +1460,15 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { args[remill::kStatePointerArgNum] = state_ptr; args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, blk.addr); - args[remill::kMemoryPointerArgNum] = bb_lifted_func.mem_ptr; + args[remill::kMemoryPointerArgNum] = + remill::LoadMemoryPointer(llvm_blk, this->intrinsics); + + auto new_mem_ptr = builder.CreateCall(bb_lifted_func.func, args); + + auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); + + builder.CreateStore(new_mem_ptr, mem_ptr_ref); - builder.CreateCall(bb_lifted_func.func, args); auto pc = this->op_lifter->LoadRegValue( llvm_blk, state_ptr, options.arch->ProgramCounterRegisterName()); From 8b65e82e2041139cb79ddaca54338468101e4259 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 17 Nov 2022 08:30:16 -0500 Subject: [PATCH 004/163] add inlining --- lib/Lifters/FunctionLifter.cpp | 36 +++++++++++++++++++++------------- lib/Lifters/FunctionLifter.h | 4 ++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 2f36ab001..19ac519ab 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1311,19 +1312,15 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( } } -// In practice, lifted functions are not workable as is; we need to emulate -// `__attribute__((flatten))`, i.e. recursively inline as much as possible, so -// that all semantics and helpers are completely inlined. -void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { - std::vector calls_to_inline; - CHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); +void FunctionLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { + std::vector calls_to_inline; // Set of instructions that we should not annotate because we can't tie them // to a particular instruction address. std::unordered_set insts_without_provenance; if (options.pc_metadata_name) { - for (auto &inst : llvm::instructions(*native_func)) { + for (auto &inst : llvm::instructions(*inf)) { if (!inst.getMetadata(pc_annotation_id)) { insts_without_provenance.insert(&inst); } @@ -1333,7 +1330,7 @@ void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { for (auto changed = true; changed; changed = !calls_to_inline.empty()) { calls_to_inline.clear(); - for (auto &inst : llvm::instructions(*native_func)) { + for (auto &inst : llvm::instructions(*inf)) { if (auto call_inst = llvm::dyn_cast(&inst); call_inst) { if (auto called_func = call_inst->getCalledFunction(); called_func && !called_func->isDeclaration() && @@ -1356,7 +1353,7 @@ void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { // Propagate PC metadata from call sites into inlined call bodies. if (options.pc_metadata_name) { - for (auto &inst : llvm::instructions(*native_func)) { + for (auto &inst : llvm::instructions(*inf)) { if (!inst.getMetadata(pc_annotation_id)) { if (insts_without_provenance.count(&inst)) { continue; @@ -1402,6 +1399,13 @@ void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { ClearVariableNames(native_func); } +// In practice, lifted functions are not workable as is; we need to emulate +// `__attribute__((flatten))`, i.e. recursively inline as much as possible, so +// that all semantics and helpers are completely inlined. +void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { + CHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); + this->RecursivelyInlineFunctionCallees(this->native_func); +} // Lift a function. Will return `nullptr` if the memory is // not accessible or executable. @@ -1443,6 +1447,8 @@ FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); llvm::ReturnInst::Create(bb->getContext(), memory, bb); + this->RecursivelyInlineFunctionCallees(basic_block_function.func); + bb->getParent()->dump(); return bb; } @@ -1453,7 +1459,8 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { llvm::IRBuilder<> builder(llvm_blk); auto bb_lifted_func = this->CreateLiftedFunction("basic_block_func" + std::to_string(blk.addr)); - + bb_lifted_func.func->removeFnAttr(llvm::Attribute::AlwaysInline); + bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); std::array args; @@ -1622,12 +1629,13 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { CallLiftedFunctionFromNativeFunction(decl); - this->lifted_func->dump(); - LOG(FATAL) << "Not fully implemented yet"; - // The last stage is that we need to recursively inline all calls to semantics // functions into `native_func`. - //RecursivelyInlineLiftedFunctionIntoNativeFunction(); + RecursivelyInlineLiftedFunctionIntoNativeFunction(); + + + this->native_func->dump(); + LOG(FATAL) << "Not fully implemented yet"; return native_func; diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 810bdc0f8..cafdd630b 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -476,6 +476,10 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // that all semantics and helpers are completely inlined. void RecursivelyInlineLiftedFunctionIntoNativeFunction(void); + // inline on arbitrary function. + void RecursivelyInlineFunctionCallees(llvm::Function *); + + // Allocate and initialize the state structure. void AllocateAndInitializeStateStructure(llvm::BasicBlock *block, const remill::Arch *arch); From 998acf0aa7422ee7bec1ed1b913cd1470d49dbb1 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 17 Nov 2022 15:51:43 +0100 Subject: [PATCH 005/163] Use per-block context assignments --- data_specifications/specification.proto | 3 +-- include/anvill/Declarations.h | 10 ++++------ include/anvill/Specification.h | 7 +++---- lib/Lifters/FunctionLifter.cpp | 19 +++++-------------- lib/Protobuf.cpp | 6 +++--- lib/Specification.cpp | 2 -- 6 files changed, 16 insertions(+), 31 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 466c37e12..eebb72ae1 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -164,11 +164,11 @@ message CodeBlock { repeated uint64 incoming_blocks = 3; repeated uint64 outgoing_blocks = 4; uint32 size = 5; + map context_assignments = 6; } message Function { uint64 entry_address = 1; - map context_assignments = 2; FunctionLinkage func_linkage = 3; Callable callable = 4; map blocks = 5; @@ -193,7 +193,6 @@ message MemoryRange { message JumpTarget { uint64 address = 1; - map context_assignments = 2; } message Jump { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 9fe392c12..3c3d9f90a 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -46,6 +46,10 @@ struct CodeBlock { uint64_t addr; uint32_t size; std::unordered_set outgoing_edges; + // The set of context assignments that occur at the entry point to this block. + // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) + // So we declare the context assignments that occur at the entry point to a block. + std::unordered_map context_assignments; }; class TypeDictionary; @@ -204,12 +208,6 @@ struct FunctionDecl : public CallableDecl { bool lift_as_decl{false}; bool is_extern{false}; - - // The set of context assignments that occur at the entry point to this function. - // A called function may have specific decoding context properties such as "TM=1" (the thumb bit is set) - // So we declare the context assignments that occur at the entry point to a function. - std::unordered_map context_assignments; - // These are the blocks contained within the function representing the CFG. std::unordered_map cfg; diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index a412c0afd..f7e4fda46 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -61,7 +61,6 @@ struct ControlFlowOverrideSpec { struct JumpTarget { std::uint64_t address; - std::unordered_map context_assignments; }; struct Jump : ControlFlowOverrideSpec { @@ -78,7 +77,8 @@ struct Return : ControlFlowOverrideSpec {}; struct Misc : ControlFlowOverrideSpec {}; -using ControlFlowOverride = std::variant; +using ControlFlowOverride = + std::variant; struct CallSiteDecl; struct FunctionDecl; @@ -161,8 +161,7 @@ class Specification { void ForEachReturn(std::function cb) const; // Call `cb` on each miscellaneous control flow override, until `cb` returns `false`. - void ForEachMiscOverride( - std::function cb) const; + void ForEachMiscOverride(std::function cb) const; inline bool operator==(const Specification &that) const noexcept { return impl.get() == that.impl.get(); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 19ac519ab..39861c36f 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -410,11 +410,7 @@ void FunctionLifter::DoSwitchBasedIndirectJump( llvm::BranchInst::Create( - GetOrCreateTargetBlock( - inst, destination.address, - this->ApplyTargetList(destination.context_assignments, - prev_context)), - block); + GetOrCreateTargetBlock(inst, destination.address, prev_context), block); // We have multiple destinations. Handle this with a switch. If the target // list is not marked as complete, then we'll still add __remill_jump @@ -455,9 +451,8 @@ void FunctionLifter::DoSwitchBasedIndirectJump( auto dest_id{0u}; for (auto dest : target_list) { - auto dest_block = GetOrCreateTargetBlock( - inst, dest.address, - this->ApplyTargetList(dest.context_assignments, prev_context)); + auto dest_block = + GetOrCreateTargetBlock(inst, dest.address, prev_context); auto dest_case = llvm::ConstantInt::get(address_type, dest_id++); switch_inst->addCase(dest_case, dest_block); } @@ -1429,6 +1424,8 @@ FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, auto reached_addr = blk.addr; // TODO(Ian): use a different context auto init_context = this->options.arch->CreateInitialContext(); + ApplyTargetList(blk.context_assignments, init_context); + while (reached_addr < blk.addr + blk.size) { auto res = this->DecodeInstructionInto(reached_addr, false, &inst, init_context); @@ -1609,12 +1606,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - - auto default_mapping = options.arch->CreateInitialContext(); - for (const auto &[k, v] : decl.context_assignments) { - default_mapping.UpdateContextReg(k, v); - } - ir.CreateBr(this->GetOrCreateBlock(this->func_address)); AnnotateInstructions(entry_block, pc_annotation_id, diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 12b1768c7..133202371 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -497,8 +497,6 @@ Result ProtobufTranslator::DecodeFunction( if (!parse_res.Succeeded()) { return parse_res.TakeError(); } - decl.context_assignments = {function.context_assignments().begin(), - function.context_assignments().end()}; this->ParseCFGIntoFunction(function, decl); @@ -524,7 +522,9 @@ void ProtobufTranslator::ParseCFGIntoFunction( CodeBlock nblk = {blk.second.address(), blk.second.size(), {blk.second.outgoing_blocks().begin(), - blk.second.outgoing_blocks().end()}}; + blk.second.outgoing_blocks().end()}, + {blk.second.context_assignments().begin(), + blk.second.context_assignments().end()}}; decl.cfg.emplace(blk.first, std::move(nblk)); } } diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 0557d655d..4d137e53e 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -180,8 +180,6 @@ SpecificationImpl::ParseSpecification( jmp.address = jump.address(); for (auto &target : jump.targets()) { JumpTarget jmp_target; - auto &assignments = target.context_assignments(); - jmp_target.context_assignments = {assignments.begin(), assignments.end()}; jmp_target.address = target.address(); jmp.targets.push_back(jmp_target); } From 8732f1c7ee55b021a845098158298475672aa601 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 17 Nov 2022 10:01:25 -0500 Subject: [PATCH 006/163] fix PC semantics --- lib/Lifters/FunctionLifter.cpp | 65 +++++++++++++++++++++++++++++----- lib/Lifters/FunctionLifter.h | 13 ++++++- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 39861c36f..c55e12160 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include "EntityLifter.h" @@ -1415,9 +1416,8 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { } -llvm::BasicBlock * -FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, - const CodeBlock &blk) { +llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( + BasicBlockFunction &basic_block_function, const CodeBlock &blk) { auto bb = &basic_block_function.func->getEntryBlock(); remill::Instruction inst; @@ -1442,6 +1442,11 @@ FunctionLifter::LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, inst, bb, basic_block_function.state_ptr, false /* is_delayed */); } + + llvm::IRBuilder<> builder(bb); + + builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), + basic_block_function.next_pc_out_param); auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); llvm::ReturnInst::Create(bb->getContext(), memory, bb); this->RecursivelyInlineFunctionCallees(basic_block_function.func); @@ -1454,27 +1459,28 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); llvm::IRBuilder<> builder(llvm_blk); - auto bb_lifted_func = - this->CreateLiftedFunction("basic_block_func" + std::to_string(blk.addr)); + auto bb_lifted_func = this->CreateBasicBlockFunction( + "basic_block_func" + std::to_string(blk.addr)); bb_lifted_func.func->removeFnAttr(llvm::Attribute::AlwaysInline); bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - std::array args; + std::array args; args[remill::kStatePointerArgNum] = state_ptr; args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, blk.addr); args[remill::kMemoryPointerArgNum] = remill::LoadMemoryPointer(llvm_blk, this->intrinsics); + args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(llvm_blk); + auto new_mem_ptr = builder.CreateCall(bb_lifted_func.func, args); auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); builder.CreateStore(new_mem_ptr, mem_ptr_ref); - auto pc = this->op_lifter->LoadRegValue( - llvm_blk, state_ptr, options.arch->ProgramCounterRegisterName()); + auto pc = remill::LoadNextProgramCounter(llvm_blk, this->intrinsics); auto sw = builder.CreateSwitch(pc, this->invalid_successor_block); @@ -1493,6 +1499,49 @@ void FunctionLifter::VisitBlocks() { } +BasicBlockFunction +FunctionLifter::CreateBasicBlockFunction(const std::string &name_) { + + auto &context = this->semantics_module->getContext(); + llvm::FunctionType *lifted_func_type = + llvm::dyn_cast(remill::RecontextualizeType( + this->options.arch->LiftedFunctionType(), context)); + + std::vector params = std::vector( + lifted_func_type->param_begin(), lifted_func_type->param_end()); + params.push_back(llvm::PointerType::get(context, 0)); + + llvm::FunctionType *func_type = + llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); + + + llvm::StringRef name(name_.data(), name_.size()); + auto func = + llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, + name, this->semantics_module.get()); + + auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); + auto state = remill::NthArgument(func, remill::kStatePointerArgNum); + auto pc = remill::NthArgument(func, remill::kPCArgNum); + auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); + memory->setName("memory"); + state->setName("state"); + pc->setName("program_counter"); + next_pc_out->setName("next_pc_out"); + + options.arch->InitializeEmptyLiftedFunction(func); + + auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); + auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); + auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); + + + func->addFnAttr(llvm::Attribute::NoInline); + func->setLinkage(llvm::GlobalValue::InternalLinkage); + + return {func, state_ptr, pc_arg, mem_arg, next_pc_out}; +} + LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { auto new_func = options.arch->DefineLiftedFunction(name, semantics_module.get()); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index cafdd630b..880d19ea7 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -51,6 +51,14 @@ class TypeProvider; struct ControlFlowTargetList; +struct BasicBlockFunction { + llvm::Function *func; + llvm::Argument *state_ptr; + llvm::Argument *pc_arg; + llvm::Argument *mem_ptr; + llvm::Argument *next_pc_out_param; +}; + struct LiftedFunction { llvm::Function *func; llvm::Argument *state_ptr; @@ -428,8 +436,11 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, LiftedFunction CreateLiftedFunction(const std::string &name); + BasicBlockFunction CreateBasicBlockFunction(const std::string &name); + + llvm::BasicBlock * - LiftBasicBlockIntoFunction(LiftedFunction &basic_block_function, + LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function, const CodeBlock &blk); From e18567afda3f1fa4b6e2e88f4088f8a71140b681 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Sun, 20 Nov 2022 10:03:36 -0500 Subject: [PATCH 007/163] added conditional interprocedural control flow --- lib/Lifters/FunctionLifter.cpp | 68 +++++++++++++++++++++++++++++++++- lib/Lifters/FunctionLifter.h | 10 +++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index c55e12160..eafc92296 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -53,6 +53,7 @@ #include #include "EntityLifter.h" +#include "anvill/Specification.h" namespace anvill { namespace { @@ -1416,6 +1417,66 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { } +bool FunctionLifter::DoInterProceduralControlFlow( + const remill::Instruction &insn, llvm::BasicBlock *block, + const anvill::ControlFlowOverride &override) { + // only handle inter-proc since intra-proc are handled implicitly by the CFG. + // Hmmm need to handle conditionals.... + llvm::IRBuilder<> builder(block); + if (std::holds_alternative(override)) { + auto cc = std::get(override); + remill::AddCall(block, this->intrinsics.function_call, this->intrinsics); + if (!cc.stop) { + auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); + auto npc = remill::LoadNextProgramCounterRef(block); + auto pc = remill::LoadProgramCounterRef(block); + builder.CreateStore(raddr, npc); + builder.CreateStore(raddr, pc); + } else { + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); + } + return !cc.stop; + } else if (std::holds_alternative(override)) { + remill::AddTerminatingTailCall(block, intrinsics.function_return, + intrinsics); + return false; + } + + return true; +} +void FunctionLifter::ApplyInterProceduralControlFlowOverride( + const remill::Instruction &insn, llvm::BasicBlock *&block) { + + + // if this instruction is conditional and interprocedural then we are going to split the block into a case were we do take it and a branch where we dont and then rejoin + + auto override = options.control_flow_provider.GetControlFlowOverride(insn.pc); + + if ((std::holds_alternative(override) || + std::holds_alternative(override))) { + if (std::holds_alternative( + insn.flows)) { + auto btaken = remill::LoadBranchTaken(block); + llvm::IRBuilder<> builder(block); + auto do_control_flow = + llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); + auto continuation = + llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); + builder.CreateCondBr(btaken, do_control_flow, continuation); + + // if the interprocedural control flow block isnt terminal link it back up + if (this->DoInterProceduralControlFlow(insn, do_control_flow, override)) { + llvm::BranchInst::Create(continuation, do_control_flow); + } + + block = continuation; + + } else { + this->DoInterProceduralControlFlow(insn, block, override); + } + } +} + llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( BasicBlockFunction &basic_block_function, const CodeBlock &blk) { auto bb = &basic_block_function.func->getEntryBlock(); @@ -1427,8 +1488,8 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( ApplyTargetList(blk.context_assignments, init_context); while (reached_addr < blk.addr + blk.size) { - auto res = - this->DecodeInstructionInto(reached_addr, false, &inst, init_context); + auto addr = reached_addr; + auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { LOG(FATAL) << "Failed to decode insn in block"; } @@ -1440,6 +1501,7 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( // to treat instruction lifting as an operation that can't fail. std::ignore = inst.GetLifter()->LiftIntoBlock( inst, bb, basic_block_function.state_ptr, false /* is_delayed */); + this->ApplyInterProceduralControlFlowOverride(inst, bb); } @@ -1492,6 +1554,8 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { } void FunctionLifter::VisitBlocks() { + DLOG(INFO) << "Num blocks for func " << std::hex << this->curr_decl->address + << ": " << this->curr_decl->cfg.size(); for (const auto &[addr, blk] : this->curr_decl->cfg) { DLOG(INFO) << "Visiting: " << std::hex << addr; this->VisitBlock(blk); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 880d19ea7..7fa9dac70 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -490,6 +490,16 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // inline on arbitrary function. void RecursivelyInlineFunctionCallees(llvm::Function *); + // Manipulates the control flow to restore intra-procedural state when reaching an + // inter-procedural effect. + void ApplyInterProceduralControlFlowOverride(const remill::Instruction &, + llvm::BasicBlock *&block); + + bool + DoInterProceduralControlFlow(const remill::Instruction &insn, + llvm::BasicBlock *block, + const anvill::ControlFlowOverride &override); + // Allocate and initialize the state structure. void AllocateAndInitializeStateStructure(llvm::BasicBlock *block, From 1d7fb453134f05f844133b220db9df2fdc268180 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 08:27:30 -0500 Subject: [PATCH 008/163] add own call utilities for 4 arg functions --- lib/Lifters/FunctionLifter.cpp | 34 ++++++++++++++++++++++++++++++++-- lib/Lifters/FunctionLifter.h | 9 +++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index eafc92296..c0dc54920 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1417,15 +1417,45 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { } +llvm::CallInst *FunctionLifter::AddCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics) { + auto func = source_block->getParent(); + llvm::IRBuilder<> ir(source_block); + std::array args; + args[remill::kMemoryPointerArgNum] = + NthArgument(func, remill::kMemoryPointerArgNum); + args[remill::kStatePointerArgNum] = + NthArgument(func, remill::kStatePointerArgNum); + args[remill::kPCArgNum] = NthArgument(func, remill::kPCArgNum); + return ir.CreateCall(dest_func, args); +} + + +llvm::CallInst * +FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics) { + llvm::IRBuilder<> ir(source_block); + auto npc = remill::LoadNextProgramCounter(source_block, intrinsics); + auto pc_ref = remill::LoadProgramCounterRef(source_block); + ir.CreateStore(npc, pc_ref); + auto call = this->AddCallFromBasicBlockFunctionToLifted( + source_block, dest_func, intrinsics); + call->setTailCall(true); + ir.CreateRet(call); + return call; +} + bool FunctionLifter::DoInterProceduralControlFlow( const remill::Instruction &insn, llvm::BasicBlock *block, const anvill::ControlFlowOverride &override) { // only handle inter-proc since intra-proc are handled implicitly by the CFG. - // Hmmm need to handle conditionals.... llvm::IRBuilder<> builder(block); if (std::holds_alternative(override)) { auto cc = std::get(override); - remill::AddCall(block, this->intrinsics.function_call, this->intrinsics); + this->AddCallFromBasicBlockFunctionToLifted( + block, this->intrinsics.function_call, this->intrinsics); if (!cc.stop) { auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); auto npc = remill::LoadNextProgramCounterRef(block); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 7fa9dac70..032a8a12f 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -500,6 +500,15 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, llvm::BasicBlock *block, const anvill::ControlFlowOverride &override); + // Same addcall machinery from remill except allows for the 4 argument basic block functio (state, program_counter, memory, next_pc_ref). + llvm::CallInst *AddCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics); + + llvm::CallInst *AddTerminatingTailCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics); + // Allocate and initialize the state structure. void AllocateAndInitializeStateStructure(llvm::BasicBlock *block, From 729752e9c60976811ef34f7363265b34d46b11a0 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 21 Nov 2022 12:11:00 +0100 Subject: [PATCH 009/163] Pass declaration to `AddFunctionToContext` --- lib/Lifters/FunctionLifter.cpp | 27 +++++++++++++++------------ lib/Lifters/FunctionLifter.h | 3 ++- lib/Lifters/ValueLifter.cpp | 15 ++++++--------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index c0dc54920..d90930227 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -53,6 +53,7 @@ #include #include "EntityLifter.h" +#include "anvill/Declarations.h" #include "anvill/Specification.h" namespace anvill { @@ -1840,7 +1841,7 @@ llvm::Function *EntityLifter::LiftEntity(const FunctionDecl &decl) const { // Add the function to the entity lifter's target module. const auto func_in_target_module = - func_lifter.AddFunctionToContext(func, decl.address, *impl); + func_lifter.AddFunctionToContext(func, decl, *impl); // If we had a previous declaration/definition, then we want to make sure // that we replaced its body, and we also want to make sure that if our @@ -1901,7 +1902,7 @@ llvm::Function *EntityLifter::DeclareEntity(const FunctionDecl &decl) const { if (const auto func = func_lifter.DeclareFunction(decl)) { DCHECK(!module->getFunction(func->getName())); - return func_lifter.AddFunctionToContext(func, decl.address, *impl); + return func_lifter.AddFunctionToContext(func, decl, *impl); } else { return nullptr; } @@ -1930,7 +1931,8 @@ static void EraseFunctionBody(llvm::Function *func) { // function, and copy the function into the context's module. Returns the // version of `func` inside the module of the lifter context. llvm::Function * -FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, +FunctionLifter::AddFunctionToContext(llvm::Function *func, + const FunctionDecl &decl, EntityLifterImpl &lifter_context) const { const auto target_module = options.module; @@ -1954,13 +1956,14 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, // It's possible that we've lifted this function before, but that it was // renamed by user code, and so the above check failed. Go check for that. } else { - lifter_context.ForEachEntityAtAddress(address, [&](llvm::Constant *gv) { - if (auto gv_func = llvm::dyn_cast(gv); - gv_func && gv_func->getFunctionType() == module_func_type) { - CHECK(!new_version); - new_version = gv_func; - } - }); + lifter_context.ForEachEntityAtAddress( + decl.address, [&](llvm::Constant *gv) { + if (auto gv_func = llvm::dyn_cast(gv); + gv_func && gv_func->getFunctionType() == module_func_type) { + CHECK(!new_version); + new_version = gv_func; + } + }); } // This is the first time we're lifting this function, or even the first time @@ -1978,13 +1981,13 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, // just in case it will be needed in future lifts. EraseFunctionBody(func); - if (auto func_annotation = GetPCAnnotation(address)) { + if (auto func_annotation = GetPCAnnotation(decl.address)) { new_version->setMetadata(pc_annotation_id, func_annotation); } // Update the context to keep its internal concepts of what LLVM objects // correspond with which native binary addresses. - lifter_context.AddEntity(new_version, address); + lifter_context.AddEntity(new_version, decl.address); // The function we just lifted may call other functions, so we need to go // find those and also use them to update the context. diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 032a8a12f..22eeb8641 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -89,7 +89,8 @@ class FunctionLifter { // Update the associated entity lifter with information about this // function, and copy the function into the context's module. Returns the // version of `func` inside the module of the lifter context. - llvm::Function *AddFunctionToContext(llvm::Function *func, uint64_t address, + llvm::Function *AddFunctionToContext(llvm::Function *func, + const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; private: diff --git a/lib/Lifters/ValueLifter.cpp b/lib/Lifters/ValueLifter.cpp index 023b7ad84..be980f57a 100644 --- a/lib/Lifters/ValueLifter.cpp +++ b/lib/Lifters/ValueLifter.cpp @@ -49,7 +49,7 @@ ValueLifterImpl::GetFunctionPointer(const FunctionDecl &decl, auto &func_lifter = ent_lifter.function_lifter; auto func = func_lifter.DeclareFunction(decl); auto func_in_context = - func_lifter.AddFunctionToContext(func, decl.address, ent_lifter); + func_lifter.AddFunctionToContext(func, decl, ent_lifter); return func_in_context; } @@ -127,10 +127,8 @@ static llvm::Constant *UnwrapZeroIndices(llvm::Constant *ret, // entity or plausible entity. // // NOTE(pag): `hinted_type` can be `nullptr`. -llvm::Constant * -ValueLifterImpl::TryGetPointerForAddress(uint64_t ea, - EntityLifterImpl &ent_lifter, - llvm::Type *hinted_type) const { +llvm::Constant *ValueLifterImpl::TryGetPointerForAddress( + uint64_t ea, EntityLifterImpl &ent_lifter, llvm::Type *hinted_type) const { // First, try to see if we already have an entity for this address. Give // preference to an entity with a matching type. Then to global variables and @@ -163,8 +161,7 @@ ValueLifterImpl::TryGetPointerForAddress(uint64_t ea, // Try to create a `FunctionDecl` on-demand. if (hinted_type) { - if (auto func_type = - llvm::dyn_cast(hinted_type)) { + if (auto func_type = llvm::dyn_cast(hinted_type)) { const auto func = llvm::Function::Create(func_type, llvm::GlobalValue::PrivateLinkage, ".anvill.value_lifter.temp", options.module); @@ -274,8 +271,8 @@ llvm::Constant *ValueLifterImpl::Lift(std::string_view data, llvm::Type *type, } // If we successfully lift it as a reference then we're in good shape. - if (auto val = GetPointer(address, nullptr, - ent_lifter, loc_ea, addr_space)) { + if (auto val = + GetPointer(address, nullptr, ent_lifter, loc_ea, addr_space)) { return val; } From e18dc317ca91312e6c4643944a18bc9f83bb5cd9 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 21 Nov 2022 12:32:30 +0100 Subject: [PATCH 010/163] Move basic block funcs into new context --- lib/Lifters/FunctionLifter.cpp | 23 ++++++++++++++++++----- lib/Lifters/FunctionLifter.h | 2 +- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index d90930227..2d11ff9be 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1552,8 +1552,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); llvm::IRBuilder<> builder(llvm_blk); - auto bb_lifted_func = this->CreateBasicBlockFunction( - "basic_block_func" + std::to_string(blk.addr)); + auto bb_lifted_func = this->CreateBasicBlockFunction(blk); bb_lifted_func.func->removeFnAttr(llvm::Attribute::AlwaysInline); bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); @@ -1595,8 +1594,8 @@ void FunctionLifter::VisitBlocks() { BasicBlockFunction -FunctionLifter::CreateBasicBlockFunction(const std::string &name_) { - +FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { + std::string name_ = "basic_block_func" + std::to_string(block.addr); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( @@ -1770,7 +1769,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { this->native_func->dump(); - LOG(FATAL) << "Not fully implemented yet"; return native_func; @@ -1937,6 +1935,21 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, const auto target_module = options.module; auto &module_context = target_module->getContext(); + + for (auto &[block_addr, block] : decl.cfg) { + std::string name = "basic_block_func" + std::to_string(block_addr); + auto new_version = target_module->getFunction(name); + if (!new_version) { + auto old_version = semantics_module->getFunction(name); + auto type = + llvm::dyn_cast(remill::RecontextualizeType( + old_version->getFunctionType(), module_context)); + new_version = llvm::Function::Create( + type, llvm::GlobalValue::ExternalLinkage, name, target_module); + remill::CloneFunctionInto(old_version, new_version); + } + } + const auto name = func->getName().str(); const auto module_func_type = llvm::dyn_cast( remill::RecontextualizeType(func->getFunctionType(), module_context)); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 22eeb8641..50ec595a1 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -437,7 +437,7 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, LiftedFunction CreateLiftedFunction(const std::string &name); - BasicBlockFunction CreateBasicBlockFunction(const std::string &name); + BasicBlockFunction CreateBasicBlockFunction(const CodeBlock &block); llvm::BasicBlock * From 07866fde5ddd42ef29f815a226ec0332fab10cc7 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 21 Nov 2022 11:23:13 +0100 Subject: [PATCH 011/163] Remove unused passes --- include/anvill/Passes/LowerSwitchIntrinsics.h | 50 ---- .../anvill/Passes/LowerTypeHintIntrinsics.h | 24 -- include/anvill/Transforms.h | 17 +- lib/CMakeLists.txt | 35 ++- lib/Optimize.cpp | 4 - lib/Passes/LowerSwitchIntrinsics.cpp | 214 ------------------ lib/Passes/LowerTypeHintIntrinsics.cpp | 66 ------ .../anvill_passes/src/SwitchLoweringPass.cpp | 3 +- 8 files changed, 18 insertions(+), 395 deletions(-) delete mode 100644 include/anvill/Passes/LowerSwitchIntrinsics.h delete mode 100644 include/anvill/Passes/LowerTypeHintIntrinsics.h delete mode 100644 lib/Passes/LowerSwitchIntrinsics.cpp delete mode 100644 lib/Passes/LowerTypeHintIntrinsics.cpp diff --git a/include/anvill/Passes/LowerSwitchIntrinsics.h b/include/anvill/Passes/LowerSwitchIntrinsics.h deleted file mode 100644 index a0116206a..000000000 --- a/include/anvill/Passes/LowerSwitchIntrinsics.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -// The goal here is to lower anvill_complete_switch to an llvm switch when we -// can recover the cases. This analysis must be sound but -// `anvill_complete_switch` maybe used for any complete set of indirect targets -// so cases may not even exist. -// -// The analysis has to prove to us that this transformation is semantically -// preserving. -// -// This pass focuses on lowering switch statements where a jump table does exist - -namespace anvill { - -class LowerSwitchIntrinsics - : public IndirectJumpPass, - public llvm::PassInfoMixin { - - private: - const MemoryProvider &memProv; - - public: - LowerSwitchIntrinsics(const MemoryProvider &memProv) - : memProv(memProv) {} - - static llvm::StringRef name(void); - - llvm::PreservedAnalyses runOnIndirectJump(llvm::CallInst *indirectJump, - llvm::FunctionAnalysisManager &am, - llvm::PreservedAnalyses); - - - static llvm::PreservedAnalyses BuildInitialResult(); -}; -} // namespace anvill diff --git a/include/anvill/Passes/LowerTypeHintIntrinsics.h b/include/anvill/Passes/LowerTypeHintIntrinsics.h deleted file mode 100644 index 27228e189..000000000 --- a/include/anvill/Passes/LowerTypeHintIntrinsics.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include - -namespace anvill { - -class LowerTypeHintIntrinsics final - : public llvm::PassInfoMixin { - public: - static llvm::StringRef name(void); - - llvm::PreservedAnalyses run(llvm::Function &F, - llvm::FunctionAnalysisManager &AM); -}; - -} // namespace anvill diff --git a/include/anvill/Transforms.h b/include/anvill/Transforms.h index 2758b77ae..4db6d6eb0 100644 --- a/include/anvill/Transforms.h +++ b/include/anvill/Transforms.h @@ -178,18 +178,6 @@ void AddRemoveUnusedFPClassificationCalls(llvm::FunctionPassManager &fpm); // various atomic read-modify-write variants into LLVM loads and stores. void AddLowerRemillMemoryAccessIntrinsics(llvm::FunctionPassManager &fpm); -// Type information from prior lifting efforts, or from front-end tools -// (e.g. Binary Ninja) is plumbed through the system by way of calls to -// intrinsic functions such as `__anvill_type`. These function calls -// don't interfere (too much) with optimizations, and they also survive -// optimizations. In general, the key role that they serve is to enable us to -// propagate through pointer type information at an instruction/register -// granularity. -// -// These function calls need to be removed/lowered into `inttoptr` or `bitcast` -// instructions. -void AddLowerTypeHintIntrinsics(llvm::FunctionPassManager &fpm); - // Transforms the bitcode to eliminate calls to `__remill_function_return`, // where appropriate. This will not succeed for all architectures, but is // likely to always succeed for x86(-64) and aarch64, due to their support @@ -245,7 +233,7 @@ void AddRecoverBasicStackFrame(llvm::FunctionPassManager &fpm, // for later passes to benefit from. void AddConvertAddressesToEntityUses( llvm::FunctionPassManager &fpm, const CrossReferenceResolver &resolver, - std::optional pc_annot_id=std::nullopt); + std::optional pc_annot_id = std::nullopt); // Some machine code instructions explicitly introduce undefined values / // behavior. Often, this is a result of the CPUs of different steppings of @@ -365,9 +353,6 @@ void AddBranchRecovery(llvm::FunctionPassManager &fpm); void AddRemoveFailedBranchHints(llvm::FunctionPassManager &fpm); -void AddLowerSwitchIntrinsics(llvm::FunctionPassManager &fpm, - const MemoryProvider &memprov); - // Remove constant expressions of the stack pointer that are not themselves // resolvable to references. For example, comparisons between one or two // stack pointer values. diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 96fabdb8c..8ad387299 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -40,8 +40,6 @@ set(anvill_passes Constraints LowerRemillMemoryAccessIntrinsics LowerRemillUndefinedIntrinsics - LowerSwitchIntrinsics - LowerTypeHintIntrinsics RecoverBasicStackFrame RemoveCompilerBarriers RemoveDelaySlotIntrinsics @@ -169,8 +167,8 @@ add_library(anvill STATIC set_target_properties(anvill PROPERTIES - PUBLIC_HEADER "${anvill_PUBLIC_HEADERS}" - LINKER_LANGUAGE CXX + PUBLIC_HEADER "${anvill_PUBLIC_HEADERS}" + LINKER_LANGUAGE CXX ) target_include_directories(anvill PUBLIC @@ -181,46 +179,45 @@ target_include_directories(anvill PUBLIC target_link_libraries(anvill PUBLIC - remill + remill PRIVATE - protobuf::libprotobuf + protobuf::libprotobuf ) add_dependencies(anvill check_git_anvill) -if (ANVILL_ENABLE_PYTHON3_LIBS) +if(ANVILL_ENABLE_PYTHON3_LIBS) add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../python" python) endif() -#if(ANVILL_ENABLE_TESTS) -# add_subdirectory("tests") -#endif() - +# if(ANVILL_ENABLE_TESTS) +# add_subdirectory("tests") +# endif() if(ANVILL_ENABLE_INSTALL) install( TARGETS - anvill + anvill EXPORT - anvillTargets + anvillTargets LIBRARY DESTINATION - lib + lib ARCHIVE DESTINATION - lib + lib INCLUDES DESTINATION - include + include PUBLIC_HEADER DESTINATION - "${CMAKE_INSTALL_INCLUDEDIR}/anvill" + "${CMAKE_INSTALL_INCLUDEDIR}/anvill" ) install( FILES - ${anvill_passes_HEADERS} + ${anvill_passes_HEADERS} DESTINATION - "${CMAKE_INSTALL_INCLUDEDIR}/anvill/Passes" + "${CMAKE_INSTALL_INCLUDEDIR}/anvill/Passes" ) endif(ANVILL_ENABLE_INSTALL) diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index df3586c95..865d5d619 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -105,7 +105,6 @@ class OurVerifierPass : public llvm::PassInfoMixin { void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { const LifterOptions &options = lifter.Options(); - const MemoryProvider &mp = lifter.MemoryProvider(); EntityCrossReferenceResolver xr(lifter); @@ -191,7 +190,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { AddRemoveErrorIntrinsics(fpm); AddLowerRemillMemoryAccessIntrinsics(fpm); AddRemoveCompilerBarriers(fpm); - AddLowerTypeHintIntrinsics(fpm); // TODO(pag): This pass has an issue on the `SMIME_write_ASN1` function // of the ARM64 variant of Challenge 5. @@ -229,8 +227,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); AddBranchRecovery(fpm); - AddLowerSwitchIntrinsics(fpm, mp); - pb.crossRegisterProxies(lam, fam, cam, mam); mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); diff --git a/lib/Passes/LowerSwitchIntrinsics.cpp b/lib/Passes/LowerSwitchIntrinsics.cpp deleted file mode 100644 index b941904e4..000000000 --- a/lib/Passes/LowerSwitchIntrinsics.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace anvill { - -class PcBinding { - private: - llvm::DenseMap mapping; - - PcBinding(llvm::DenseMap mapping) - : mapping(std::move(mapping)) {} - - - public: - std::optional Lookup(llvm::APInt targetPc) const { - if (this->mapping.find(targetPc) != this->mapping.end()) { - return {this->mapping.find(targetPc)->second}; - } - - return std::nullopt; - } - - static PcBinding Build(const llvm::CallInst *complete_switch, - llvm::SwitchInst *follower) { - assert(complete_switch->arg_size() - 1 == follower->getNumCases()); - - llvm::DenseMap mapping; - for (auto case_handler : follower->cases()) { - auto pc_arg = complete_switch->getArgOperand( - case_handler.getCaseValue()->getValue().getLimitedValue() + - 1); // is the switch has more than 2^64 cases we have bigger problems - mapping.insert( - {llvm::cast(pc_arg)->getValue(), - case_handler - .getCaseSuccessor()}); // the argument to a complete switch should always be a constant int - } - - - return PcBinding(std::move(mapping)); - } -}; - -class SwitchBuilder { - private: - llvm::LLVMContext &context; - const MemoryProvider &mem_prov; - const llvm::DataLayout &dl; - - std::optional ReadIntFrom(llvm::IntegerType *ty, - llvm::APInt addr) { - auto uaddr = addr.getLimitedValue(); - std::vector memory; - assert(ty->getBitWidth() % 8 == 0); - auto target_bytes = ty->getBitWidth() / 8; - - for (uint64_t i = 0; i < target_bytes; i++) { - auto res = this->mem_prov.Query(uaddr + i); - ByteAvailability avail = std::get<1>(res); - if (avail != ByteAvailability::kAvailable) { - return std::nullopt; - } - - memory.push_back(std::get<0>(res)); - } - - - llvm::APInt res(ty->getBitWidth(), 0); - - // Endianess? may have to flip around memory as needed, yeah looks like - // LoadIntMemory loads at system memory so need to use flip_memory in - // llvm::endianess - llvm::LoadIntFromMemory(res, memory.data(), target_bytes); - - if (this->dl.isLittleEndian() == llvm::sys::IsLittleEndianHost) { - return res; - } else { - return res.byteSwap(); - } - } - - public: - SwitchBuilder(llvm::LLVMContext &context, const MemoryProvider &memProv, - const llvm::DataLayout &dl) - : context(context), - mem_prov(memProv), - dl(dl) {} - - // A native switch utilizes llvms switch construct in the intended manner to - // dispatch control flow on integer values. This pass converts jump table- - // based compiler implementations of this construct back into simple switch - // cases over an integer index that directly jumps to known labels. - std::optional - CreateNativeSwitch(const JumpTableResult &jt, const PcBinding &binding, - llvm::LLVMContext &context) { - auto min_index = jt.bounds.lower; - auto number_of_cases = (jt.bounds.upper - min_index) + 1; - auto interp = jt.interp.getInterp(); - llvm::SwitchInst *new_switch = - llvm::SwitchInst::Create(jt.indexRel.getIndex(), jt.defaultOut, - number_of_cases.getLimitedValue()); - for (llvm::APInt curr_ind_value = min_index; - jt.bounds.lessThanOrEqual(curr_ind_value, jt.bounds.upper); - curr_ind_value += 1) { - auto read_address = jt.indexRel.apply(interp, curr_ind_value); - std::optional jmp_off = - this->ReadIntFrom(jt.pcRel.getExpectedType(jt.interp), read_address); - if (!jmp_off.has_value()) { - delete new_switch; - return std::nullopt; - } - - auto new_pc = jt.pcRel.apply(interp, *jmp_off); - auto out_block = binding.Lookup(new_pc); - if (!out_block.has_value()) { - delete new_switch; - return std::nullopt; - } - - - if (*out_block != jt.defaultOut) { - llvm::ConstantInt *index_val = - llvm::ConstantInt::get(this->context, curr_ind_value); - new_switch->addCase(index_val, *out_block); - } - } - return new_switch; - } -}; - - -llvm::PreservedAnalyses -LowerSwitchIntrinsics::runOnIndirectJump(llvm::CallInst *targetCall, - llvm::FunctionAnalysisManager &am, - llvm::PreservedAnalyses agg) { - - const auto &jt_analysis = - am.getResult(*targetCall->getFunction()); - auto jresult = jt_analysis.find(targetCall); - - - if (jresult == jt_analysis.end()) { - return agg; - } - - llvm::Function &f = *targetCall->getFunction(); - auto dl = f.getParent()->getDataLayout(); - llvm::LLVMContext &context = f.getParent()->getContext(); - - SwitchBuilder sbuilder(context, this->memProv, dl); - auto following_switch = targetCall->getParent()->getTerminator(); - - if (auto *follower = llvm::dyn_cast(following_switch)) { - // Check that the switch uses the complete switch - if (follower->getCondition() == targetCall) { - auto binding = PcBinding::Build(targetCall, follower); - std::optional new_switch = - sbuilder.CreateNativeSwitch(jresult->second, binding, context); - - if (new_switch) { - llvm::ReplaceInstWithInst(follower, *new_switch); - if (targetCall->uses().empty()) { - targetCall->eraseFromParent(); - } - agg.intersect(llvm::PreservedAnalyses::none()); - return agg; - } - } - } - - return agg; -} - -llvm::StringRef LowerSwitchIntrinsics::name() { - return "LowerSwitchIntrinsics"; -} - -llvm::PreservedAnalyses LowerSwitchIntrinsics::BuildInitialResult() { - return llvm::PreservedAnalyses::all(); -} - - -void AddLowerSwitchIntrinsics(llvm::FunctionPassManager &fpm, - const MemoryProvider &memprov) { - fpm.addPass(LowerSwitchIntrinsics(memprov)); -} - -} // namespace anvill diff --git a/lib/Passes/LowerTypeHintIntrinsics.cpp b/lib/Passes/LowerTypeHintIntrinsics.cpp deleted file mode 100644 index 4e3f2c8a8..000000000 --- a/lib/Passes/LowerTypeHintIntrinsics.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "Utils.h" - -namespace anvill { - -llvm::StringRef LowerTypeHintIntrinsics::name(void) { - return "LowerTypeHintIntrinsics"; -} - -llvm::PreservedAnalyses -LowerTypeHintIntrinsics::run(llvm::Function &func, - llvm::FunctionAnalysisManager &AM) { - std::vector calls; - - for (auto &inst : llvm::instructions(func)) { - if (auto call = llvm::dyn_cast(&inst)) { - if (auto callee = call->getCalledFunction(); - callee && callee->getName().startswith(kTypeHintFunctionPrefix)) { - calls.push_back(call); - } - } - } - - auto changed = false; - for (auto call : calls) { - auto val = call->getArgOperand(0)->stripPointerCasts(); - llvm::IRBuilder<> ir(call); - auto *cast_val = ir.CreateBitOrPointerCast(val, call->getType()); - CopyMetadataTo(call, cast_val); - call->replaceAllUsesWith(cast_val); - changed = true; - } - - for (auto call : calls) { - if (call->use_empty()) { - call->eraseFromParent(); - changed = true; - } - } - - return ConvertBoolToPreserved(changed); -} - -void AddLowerTypeHintIntrinsics(llvm::FunctionPassManager &fpm) { - fpm.addPass(LowerTypeHintIntrinsics()); -} - -} // namespace anvill diff --git a/tests/anvill_passes/src/SwitchLoweringPass.cpp b/tests/anvill_passes/src/SwitchLoweringPass.cpp index b8e087c52..f1a910d25 100644 --- a/tests/anvill_passes/src/SwitchLoweringPass.cpp +++ b/tests/anvill_passes/src/SwitchLoweringPass.cpp @@ -135,7 +135,7 @@ TEST_SUITE("SwitchLowerLargeFunction") { mem_prov->AddJumpTableOffset(-1153287); mem_prov->AddJumpTableOffset(-1153278); - fpm.addPass(LowerSwitchIntrinsics(*mem_prov.get())); + fpm.run(*target_function, fam); const auto &analysis_results = @@ -254,7 +254,6 @@ TEST_SUITE("SwitchLowerLargeFunction") { mem_prov->AddJumpTableOffset(0x30); fpm.addPass(llvm::InstCombinePass()); - fpm.addPass(LowerSwitchIntrinsics(*mem_prov)); const auto &analysis_results = From 7f4b0159a4f7a5928684d67143c971be9e4a51cc Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 08:41:37 -0500 Subject: [PATCH 012/163] delete old test case --- tests/anvill_passes/CMakeLists.txt | 1 - .../anvill_passes/src/SwitchLoweringPass.cpp | 318 ------------------ 2 files changed, 319 deletions(-) delete mode 100644 tests/anvill_passes/src/SwitchLoweringPass.cpp diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index c26c97728..0578c1715 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -18,7 +18,6 @@ add_executable(test_anvill_passes src/InstructionFolderPass.cpp src/BrightenPointers.cpp src/TransformRemillJump.cpp - src/SwitchLoweringPass.cpp src/XorConversionPass.cpp src/BranchRecoveryPass.cpp src/RemoveStackPointerCExprs.cpp diff --git a/tests/anvill_passes/src/SwitchLoweringPass.cpp b/tests/anvill_passes/src/SwitchLoweringPass.cpp deleted file mode 100644 index f1a910d25..000000000 --- a/tests/anvill_passes/src/SwitchLoweringPass.cpp +++ /dev/null @@ -1,318 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "Utils.h" -namespace anvill { - - -class MockMemProv : public MemoryProvider { - private: - std::map memmap; - const llvm::DataLayout &dl; - uint64_t curr_base; - - public: - MockMemProv(const llvm::DataLayout &dl) : dl(dl), curr_base(0) {} - - std::tuple - Query(uint64_t address) const { - if (this->memmap.find(address) != this->memmap.end()) { - auto val = this->memmap.find(address)->second; - return std::make_tuple(val, ByteAvailability::kAvailable, - BytePermission::kReadable); - } - std::cout << "missed address: " << address << std::endl; - return std::make_tuple(0, ByteAvailability::kUnavailable, - BytePermission::kReadable); - } - - - void SetCurrJumpTableBase(uint64_t baseAddress) { - this->curr_base = baseAddress; - } - - void AddJumpTableOffset(uint32_t offset) { - std::vector data(sizeof(uint32_t)); - if (dl.isLittleEndian()) { - llvm::support::endian::write32le(data.data(), offset); - } else { - llvm::support::endian::write32be(data.data(), offset); - } - - for (uint64_t i = 0; i < data.size(); i++) { - this->memmap.insert({this->curr_base + i, data[i]}); - } - - this->curr_base += data.size(); - } -}; - - -namespace { - -static llvm::Function *FindFunction(llvm::Module *module, std::string name) { - for (auto &function : *module) { - if (function.getName().equals(name)) { - return &function; - } - } - return nullptr; -} -} // namespace - -TEST_SUITE("SwitchLowerLargeFunction") { - TEST_CASE("Run on large function") { - auto context = anvill::CreateContextWithOpaquePointers(); - - auto mod = LoadTestData(*context, "SwitchLoweringLarge.ll"); - auto target_function = - FindFunction(mod.get(), "sub_8240110__A_Sbi_Sbii_B_0"); - CHECK(target_function != nullptr); - llvm::FunctionPassManager fpm; - llvm::FunctionAnalysisManager fam; - llvm::ModuleAnalysisManager mam; - llvm::LoopAnalysisManager lam; - llvm::CGSCCAnalysisManager cgam; - - llvm::PassBuilder pb; - - pb.registerFunctionAnalyses(fam); - pb.registerModuleAnalyses(mam); - pb.registerCGSCCAnalyses(cgam); - pb.registerLoopAnalyses(lam); - - pb.crossRegisterProxies(lam, fam, cgam, mam); - - auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); - auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*context); - - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider null_mem_prov; - anvill::LifterOptions lift_options(arch.get(), *mod, ty_prov, - std::move(ctrl_flow_provider), - null_mem_prov); - EntityLifter lifter(lift_options); - fam.registerPass([&] { return JumpTableAnalysis(lifter); }); - - fpm.addPass(llvm::InstCombinePass()); - auto mem_prov = std::make_shared(mod->getDataLayout()); - - - // this jump table has 30 entries with these possible offsets - // -3209123, -1153321, -1153312, -1153303, -1153287, -1153278 - // the offset for the default lable %41 is -3209123 - // Since there are 30 entries in the table this test assumes the 5 offsets are in order bookending a bunch of default cases - - - mem_prov->SetCurrJumpTableBase(136968824); - mem_prov->AddJumpTableOffset(-1153321); - mem_prov->AddJumpTableOffset(-1153312); - for (int i = 0; i < 25; i++) { - mem_prov->AddJumpTableOffset(-3209123); - } - - mem_prov->AddJumpTableOffset(-1153303); - mem_prov->AddJumpTableOffset(-1153287); - mem_prov->AddJumpTableOffset(-1153278); - - fpm.run(*target_function, fam); - - - const auto &analysis_results = - fam.getResult(*target_function); - - REQUIRE(analysis_results.size() == - 3); // check that we resolve all the switches - - - llvm::BasicBlock *target_block = nullptr; - for (const auto &jumpres : analysis_results) { - // unfortunately values are no longer identifiable by labels because the pass requires the instruction combiner which will now run again so identify switch by first non default pc value. - llvm::Value *v = jumpres.first->getArgOperand(2); - const JumpTableResult &res = jumpres.second; - auto interp = res.interp.getInterp(); - REQUIRE(llvm::isa(v)); - auto pc1 = llvm::cast(v); - switch (pc1->getValue().getLimitedValue()) { - case 136577416: - CHECK(res.bounds.lower.getLimitedValue() == 3); - CHECK(res.bounds.upper.getLimitedValue() == 241); - CHECK(!res.bounds.isSigned); - CHECK(res.indexRel.apply(interp, llvm::APInt(8, 5)) == 136967792); - break; - case 136578775: - CHECK(res.bounds.lower.getLimitedValue() == 6); - CHECK(res.bounds.upper.getLimitedValue() == 35); - CHECK(!res.bounds.isSigned); - CHECK(res.indexRel.apply(interp, llvm::APInt(8, 35)) == 136968940); - target_block = jumpres.first->getParent(); - break; - case 136578559: - CHECK(res.bounds.lower.getLimitedValue() == 26); - CHECK(res.bounds.upper.getLimitedValue() == 46); - CHECK(!res.bounds.isSigned); - CHECK(res.indexRel.apply(interp, llvm::APInt(8, 32)) == 136968764); - break; - default: CHECK(false); - } - } - - fpm.run(*target_function, fam); - - REQUIRE(target_block != nullptr); - llvm::SwitchInst *lowered_switch = - llvm::cast(target_block->getTerminator()); - - CHECK(lowered_switch->getNumCases() == 5); - - - llvm::SmallSet allowed_indices; - allowed_indices.insert(6); - allowed_indices.insert(7); - allowed_indices.insert(33); - allowed_indices.insert(34); - allowed_indices.insert(35); - - for (auto c : lowered_switch->cases()) { - CHECK(allowed_indices.contains( - c.getCaseValue()->getValue().getLimitedValue())); - } - - - lam.clear(); - fam.clear(); - mam.clear(); - cgam.clear(); - } - - TEST_CASE("Try negative Index") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "SwitchLoweringNeg.ll"); - auto target_function = FindFunction(mod.get(), "_start"); - CHECK(target_function != nullptr); - llvm::FunctionPassManager fpm; - llvm::FunctionAnalysisManager fam; - llvm::ModuleAnalysisManager mam; - llvm::LoopAnalysisManager lam; - llvm::CGSCCAnalysisManager cgam; - - llvm::PassBuilder pb; - - pb.registerFunctionAnalyses(fam); - pb.registerModuleAnalyses(mam); - pb.registerCGSCCAnalyses(cgam); - pb.registerLoopAnalyses(lam); - - pb.crossRegisterProxies(lam, fam, cgam, mam); - - auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); - auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*context); - - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider null_mem_prov; - anvill::LifterOptions lift_options(arch.get(), *mod, ty_prov, - std::move(ctrl_flow_provider), - null_mem_prov); - EntityLifter lifter(lift_options); - - fam.registerPass([&] { return JumpTableAnalysis(lifter); }); - - - auto mem_prov = std::make_shared(mod->getDataLayout()); - - - mem_prov->SetCurrJumpTableBase(4294983520); - mem_prov->AddJumpTableOffset(0x10); - mem_prov->AddJumpTableOffset(0x3c); - mem_prov->AddJumpTableOffset(0x3c); - mem_prov->AddJumpTableOffset(0x1c); - mem_prov->AddJumpTableOffset(0x28); - mem_prov->AddJumpTableOffset(0x3c); - mem_prov->AddJumpTableOffset(0x3c); - mem_prov->AddJumpTableOffset(0x30); - - fpm.addPass(llvm::InstCombinePass()); - - - const auto &analysis_results = - fam.getResult(*target_function); - - REQUIRE(analysis_results.size() == - 1); // check that we resolve all the switches - - - for (const auto &jumpres : analysis_results) { - auto interp = jumpres.second.interp.getInterp(); - // unfortunately values are no longer identifiable by labels because the pass requires the instruction combiner which will now run again so identify switch by first non default pc value. - llvm::Value *v = jumpres.first->getArgOperand(2); - const JumpTableResult &res = jumpres.second; - REQUIRE(llvm::isa(v)); - auto pc1 = llvm::cast(v); - switch (pc1->getValue().getLimitedValue()) { - case 4294983464: - CHECK(res.bounds.lower == llvm::APInt(32, -4, true)); - CHECK(res.bounds.upper == llvm::APInt(32, 3, true)); - CHECK(res.bounds.isSigned); - CHECK(res.indexRel.apply(interp, llvm::APInt(32, -3, true)) - .getLimitedValue() == 4294983524); - break; - default: CHECK(false); - } - } - - fpm.run(*target_function, fam); - - - llvm::SwitchInst *lowered_switch = nullptr; - - for (auto &inst : llvm::instructions(target_function)) { - if (auto sw = llvm::dyn_cast(&inst)) { - lowered_switch = sw; - } - } - - REQUIRE(lowered_switch != nullptr); - - CHECK(lowered_switch->getNumCases() == 4); - - llvm::SmallSet allowed_indices; - allowed_indices.insert(llvm::APInt(32, -4).getLimitedValue()); - allowed_indices.insert(llvm::APInt(32, -1).getLimitedValue()); - allowed_indices.insert(llvm::APInt(32, -0).getLimitedValue()); - allowed_indices.insert(llvm::APInt(32, 3).getLimitedValue()); - - for (auto c : lowered_switch->cases()) { - CHECK(allowed_indices.contains( - c.getCaseValue()->getValue().getLimitedValue())); - } - - fam.clear(); - cgam.clear(); - lam.clear(); - mam.clear(); - } -} - -} // namespace anvill From e6a8377602786a4f19afa320e915b78a689b8b9c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 08:54:13 -0500 Subject: [PATCH 013/163] dont try to relift basic blocks for decls: --- lib/Lifters/FunctionLifter.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 2d11ff9be..1ed2a2aaf 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1936,17 +1936,20 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, const auto target_module = options.module; auto &module_context = target_module->getContext(); - for (auto &[block_addr, block] : decl.cfg) { - std::string name = "basic_block_func" + std::to_string(block_addr); - auto new_version = target_module->getFunction(name); - if (!new_version) { - auto old_version = semantics_module->getFunction(name); - auto type = - llvm::dyn_cast(remill::RecontextualizeType( - old_version->getFunctionType(), module_context)); - new_version = llvm::Function::Create( - type, llvm::GlobalValue::ExternalLinkage, name, target_module); - remill::CloneFunctionInto(old_version, new_version); + + if (!func->isDeclaration()) { + for (auto &[block_addr, block] : decl.cfg) { + std::string name = "basic_block_func" + std::to_string(block_addr); + auto new_version = target_module->getFunction(name); + if (!new_version) { + auto old_version = semantics_module->getFunction(name); + auto type = + llvm::dyn_cast(remill::RecontextualizeType( + old_version->getFunctionType(), module_context)); + new_version = llvm::Function::Create( + type, llvm::GlobalValue::ExternalLinkage, name, target_module); + remill::CloneFunctionInto(old_version, new_version); + } } } From 13cf91b5bd453267c4aba03f40cbb44dfd48bc4e Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 21 Nov 2022 15:35:55 +0100 Subject: [PATCH 014/163] Parse local variable decls --- data_specifications/specification.proto | 1 + include/anvill/Declarations.h | 7 +++++++ lib/Protobuf.cpp | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index eebb72ae1..8d8fdddb8 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -172,6 +172,7 @@ message Function { FunctionLinkage func_linkage = 3; Callable callable = 4; map blocks = 5; + map local_variables = 6; } message GlobalVariable { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 3c3d9f90a..894c8fd71 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -181,6 +181,11 @@ struct CallableDecl { DecodeFromPB(const remill::Arch *arch, const std::string &pb); }; +struct LocalVariableDecl { + std::string name; + std::vector values; +}; + // A function decl, as represented at a "near ABI" level. To be specific, // not all C, and most C++ decls, as written would be directly translatable // to this. This ought nearly represent how LLVM represents a C/C++ function @@ -211,6 +216,8 @@ struct FunctionDecl : public CallableDecl { // These are the blocks contained within the function representing the CFG. std::unordered_map cfg; + std::unordered_map locals; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 133202371..be1e634f7 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -512,6 +512,22 @@ Result ProtobufTranslator::DecodeFunction( decl.is_extern = false; } + for (auto &[name, local] : function.local_variables()) { + decl.locals[name].name = name; + auto type_spec = DecodeType(local.type()); + if (!type_spec.Succeeded()) { + return type_spec.Error(); + } + + for (auto &value : local.values()) { + auto value_decl = DecodeValue(value, type_spec.Value(), "local variable"); + if (!value_decl.Succeeded()) { + return value_decl.Error(); + } + decl.locals[name].values.push_back(value_decl.Value()); + } + } + return decl; } From 8898b9dfe1357c9ac68a9b599ef4658fc6b0a523 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 16:13:27 -0500 Subject: [PATCH 015/163] add test --- tests/anvill_passes/CMakeLists.txt | 1 + tests/anvill_passes/data/MainBasicBlocks.ll | 5036 +++++++++++++++++ .../anvill_passes/src/TestAbstractStackBB.cpp | 21 + 3 files changed, 5058 insertions(+) create mode 100644 tests/anvill_passes/data/MainBasicBlocks.ll create mode 100644 tests/anvill_passes/src/TestAbstractStackBB.cpp diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index 0578c1715..6a8011866 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(test_anvill_passes src/BranchRecoveryPass.cpp src/RemoveStackPointerCExprs.cpp src/RecoverEntityUses.cpp + src/TestAbstractStackBB.cpp ) target_link_libraries(test_anvill_passes PRIVATE diff --git a/tests/anvill_passes/data/MainBasicBlocks.ll b/tests/anvill_passes/data/MainBasicBlocks.ll new file mode 100644 index 000000000..c69340898 --- /dev/null +++ b/tests/anvill_passes/data/MainBasicBlocks.ll @@ -0,0 +1,5036 @@ +; ModuleID = 'lifted_code' +source_filename = "lifted_code" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu-elf" + +%struct.State = type { %struct.X86State } +%struct.X86State = type { %struct.ArchState, [32 x %union.VectorReg], %struct.ArithFlags, %union.anon, %struct.Segments, %struct.AddressSpace, %struct.GPR, %struct.X87Stack, %struct.MMX, %struct.FPUStatusFlags, %union.anon, %union.FPU, %struct.SegmentCaches, %struct.K_REG } +%struct.ArchState = type { i32, i32, %union.anon } +%union.VectorReg = type { %union.vec512_t } +%union.vec512_t = type { %struct.uint64v8_t } +%struct.uint64v8_t = type { [8 x i64] } +%struct.ArithFlags = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +%struct.Segments = type { i16, %union.SegmentSelector, i16, %union.SegmentSelector, i16, %union.SegmentSelector, i16, %union.SegmentSelector, i16, %union.SegmentSelector, i16, %union.SegmentSelector } +%union.SegmentSelector = type { i16 } +%struct.AddressSpace = type { i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg } +%struct.Reg = type { %union.anon } +%struct.GPR = type { i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg, i64, %struct.Reg } +%struct.X87Stack = type { [8 x %struct.anon.3] } +%struct.anon.3 = type { [6 x i8], %struct.float80_t } +%struct.float80_t = type { [10 x i8] } +%struct.MMX = type { [8 x %struct.anon.4] } +%struct.anon.4 = type { i64, %union.vec64_t } +%union.vec64_t = type { %struct.uint64v1_t } +%struct.uint64v1_t = type { [1 x i64] } +%struct.FPUStatusFlags = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [4 x i8] } +%union.anon = type { i64 } +%union.FPU = type { %struct.anon.13 } +%struct.anon.13 = type { %struct.FpuFXSAVE, [96 x i8] } +%struct.FpuFXSAVE = type { %union.SegmentSelector, %union.SegmentSelector, %union.FPUAbridgedTagWord, i8, i16, i32, %union.SegmentSelector, i16, i32, %union.SegmentSelector, i16, %union.FPUControlStatus, %union.FPUControlStatus, [8 x %struct.FPUStackElem], [16 x %union.vec128_t] } +%union.FPUAbridgedTagWord = type { i8 } +%union.FPUControlStatus = type { i32 } +%struct.FPUStackElem = type { %union.anon.11, [6 x i8] } +%union.anon.11 = type { %struct.float80_t } +%union.vec128_t = type { %struct.uint128v1_t } +%struct.uint128v1_t = type { [1 x i128] } +%struct.SegmentCaches = type { %struct.SegmentShadow, %struct.SegmentShadow, %struct.SegmentShadow, %struct.SegmentShadow, %struct.SegmentShadow, %struct.SegmentShadow } +%struct.SegmentShadow = type { %union.anon, i32, i32 } +%struct.K_REG = type { [8 x %struct.anon.18] } +%struct.anon.18 = type { i64, i64 } + +@__anvill_reg_RAX = external local_unnamed_addr global i64 +@__anvill_reg_RBX = external local_unnamed_addr global i64 +@__anvill_reg_RCX = external local_unnamed_addr global i64 +@__anvill_reg_RDX = external local_unnamed_addr global i64 +@__anvill_reg_RDI = external local_unnamed_addr global i64 +@__anvill_reg_RBP = external local_unnamed_addr global i64 +@__anvill_reg_R8 = external local_unnamed_addr global i64 +@__anvill_reg_R9 = external local_unnamed_addr global i64 +@__anvill_reg_R10 = external local_unnamed_addr global i64 +@__anvill_reg_R11 = external local_unnamed_addr global i64 +@__anvill_reg_R12 = external local_unnamed_addr global i64 +@__anvill_reg_R13 = external local_unnamed_addr global i64 +@__anvill_reg_R14 = external local_unnamed_addr global i64 +@__anvill_reg_R15 = external local_unnamed_addr global i64 +@__anvill_reg_SS = external local_unnamed_addr global i16 +@__anvill_reg_ES = external local_unnamed_addr global i16 +@__anvill_reg_GS = external local_unnamed_addr global i16 +@__anvill_reg_FS = external local_unnamed_addr global i16 +@__anvill_reg_DS = external local_unnamed_addr global i16 +@__anvill_reg_CS = external local_unnamed_addr global i16 +@__anvill_reg_XMM0 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM1 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM2 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM3 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM4 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM5 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM6 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM7 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM8 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM9 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM10 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM11 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM12 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM13 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM14 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_XMM15 = external local_unnamed_addr global [16 x i8] +@__anvill_reg_ST0 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST1 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST2 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST3 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST4 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST5 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST6 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_ST7 = external local_unnamed_addr global x86_fp80 +@__anvill_reg_MM0 = external local_unnamed_addr global i64 +@__anvill_reg_MM1 = external local_unnamed_addr global i64 +@__anvill_reg_MM2 = external local_unnamed_addr global i64 +@__anvill_reg_MM3 = external local_unnamed_addr global i64 +@__anvill_reg_MM4 = external local_unnamed_addr global i64 +@__anvill_reg_MM5 = external local_unnamed_addr global i64 +@__anvill_reg_MM6 = external local_unnamed_addr global i64 +@__anvill_reg_MM7 = external local_unnamed_addr global i64 +@__anvill_reg_AF = external local_unnamed_addr global i8 +@__anvill_reg_CF = external local_unnamed_addr global i8 +@__anvill_reg_DF = external local_unnamed_addr global i8 +@__anvill_reg_OF = external local_unnamed_addr global i8 +@__anvill_reg_PF = external local_unnamed_addr global i8 +@__anvill_reg_SF = external local_unnamed_addr global i8 +@__anvill_reg_ZF = external local_unnamed_addr global i8 +@__anvill_ra = external global i64 +@__anvill_pc = external global i64 +@var_402020__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_40203a__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_40204d_B = local_unnamed_addr constant i8 119 +@var_40204f_B = local_unnamed_addr constant i8 37 +@var_402052_B = local_unnamed_addr constant i8 49 +@var_402057__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_402060__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_402098_B = local_unnamed_addr constant i8 67 +@var_40209c__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_4020b1_B = local_unnamed_addr constant i8 111 +@var_4020b3_B = local_unnamed_addr constant i8 120 +@var_4020b5__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@var_4020c4__CBx0_D = local_unnamed_addr constant [0 x i8] zeroinitializer +@__anvill_stack_0 = external local_unnamed_addr global i64 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199049(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 3 + %2 = load i64, ptr %RBX, align 8 + %3 = inttoptr i64 %2 to ptr + %4 = load i64, ptr %3, align 8 + store i64 %4, ptr %RSI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %5 = add i64 %program_counter, 8 + store i64 4202528, ptr %RDI, align 8, !tbaa !5 + store i64 %5, ptr %PC, align 8 + %6 = add i64 %program_counter, 10 + %7 = load i64, ptr %EAX, align 8 + %8 = load i32, ptr %EAX, align 4 + %conv.i.i = trunc i64 %7 to i32 + %xor3.i.i = xor i32 %8, %conv.i.i + %conv.i27.i = zext i32 %xor3.i.i to i64 + store i64 %conv.i27.i, ptr %EAX, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %xor3.i.i to i8 + %9 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %10 = and i8 %9, 1 + %11 = xor i8 %10, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %11, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %xor3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %xor3.i.i, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %6, ptr %PC, align 8 + %12 = add i64 %program_counter, 15 + %13 = add i64 %program_counter, -505 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %14 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %14, -8 + %15 = inttoptr i64 %sub.i.i to ptr + store i64 %12, ptr %15, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %13, ptr %rip.i, align 8, !tbaa !5 + %16 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %12, ptr %PC, align 8 + %17 = add i64 %program_counter, 20 + store i64 1, ptr %EAX, align 8, !tbaa !5 + store i64 %17, ptr %PC, align 8 + %18 = add i64 %program_counter, 873 + store i64 %18, ptr %rip.i, align 8, !tbaa !5 + store i64 %18, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i8 @llvm.ctpop.i8(i8) #1 + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_flag_computation_zero(i1 noundef zeroext, ...) local_unnamed_addr #2 + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_flag_computation_sign(i1 noundef zeroext, ...) local_unnamed_addr #2 + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i8 @__remill_undefined_8() local_unnamed_addr #2 + +; Function Attrs: noduplicate noinline nounwind optnone +declare ptr @__remill_function_call(ptr noundef nonnull align 1, i64 noundef, ptr noundef) local_unnamed_addr #3 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199174(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !33 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %R14 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !remill_register !35 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + %2 = load i64, ptr %RSP, align 8 + %3 = add i64 %2, 24 + store i64 %3, ptr %R14, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %4 = add i64 %program_counter, 10 + %5 = add i64 %2, 86 + store i64 %5, ptr %RBP, align 8, !tbaa !5 + store i64 %4, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199922(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !33 + %R15 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 31, i32 0, i32 0, !remill_register !36 + %R14 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !remill_register !35 + %R13 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 27, i32 0, i32 0, !remill_register !37 + %R12 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 25, i32 0, i32 0, !remill_register !38 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 7 + %2 = load i64, ptr %RSP, align 8 + %add.i.i = add i64 %2, 248 + store i64 %add.i.i, ptr %RSP, align 8, !tbaa !5 + %cmp.i.i.i = icmp ugt i64 %2, -249 + %conv.i.i = zext i1 %cmp.i.i.i to i8 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 %conv.i.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i64 %add.i.i to i8 + %3 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %4 = and i8 %3, 1 + %5 = xor i8 %4, 1 + %pf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %5, ptr %pf.i.i.i, align 1, !tbaa !27 + %6 = xor i64 %2, %add.i.i + %7 = trunc i64 %6 to i8 + %8 = xor i8 %7, -1 + %9 = lshr i8 %8, 4 + %10 = and i8 %9, 1 + %af.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 %10, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i64 %add.i.i, 0 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + %zf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv5.i.i.i, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i64 %add.i.i, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + %sf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv8.i.i.i, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i64 %2, 63 + %shr2.i.i.i.i = lshr i64 %add.i.i, 63 + %xor.i28.i.i.i = xor i64 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i64 %xor.i28.i.i.i, %shr2.i.i.i.i + %cmp.i29.i.i.i = icmp eq i64 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + %of.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 %conv11.i.i.i, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %1, ptr %PC, align 8 + %11 = add i64 %program_counter, 8 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %12 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %add.i.i1 = add i64 %12, 8 + store i64 %add.i.i1, ptr %rsp.i, align 8, !tbaa !5 + %13 = inttoptr i64 %12 to ptr + %14 = load i64, ptr %13, align 8 + store i64 %14, ptr %RBX, align 8, !tbaa !5 + store i64 %11, ptr %PC, align 8 + %15 = add i64 %program_counter, 10 + %add.i.i3 = add i64 %12, 16 + store i64 %add.i.i3, ptr %rsp.i, align 8, !tbaa !5 + %16 = inttoptr i64 %add.i.i1 to ptr + %17 = load i64, ptr %16, align 8 + store i64 %17, ptr %R12, align 8, !tbaa !5 + store i64 %15, ptr %PC, align 8 + %18 = add i64 %program_counter, 12 + %add.i.i6 = add i64 %12, 24 + store i64 %add.i.i6, ptr %rsp.i, align 8, !tbaa !5 + %19 = inttoptr i64 %add.i.i3 to ptr + %20 = load i64, ptr %19, align 8 + store i64 %20, ptr %R13, align 8, !tbaa !5 + store i64 %18, ptr %PC, align 8 + %21 = add i64 %program_counter, 14 + %add.i.i9 = add i64 %12, 32 + store i64 %add.i.i9, ptr %rsp.i, align 8, !tbaa !5 + %22 = inttoptr i64 %add.i.i6 to ptr + %23 = load i64, ptr %22, align 8 + store i64 %23, ptr %R14, align 8, !tbaa !5 + store i64 %21, ptr %PC, align 8 + %24 = add i64 %program_counter, 16 + %add.i.i12 = add i64 %12, 40 + store i64 %add.i.i12, ptr %rsp.i, align 8, !tbaa !5 + %25 = inttoptr i64 %add.i.i9 to ptr + %26 = load i64, ptr %25, align 8 + store i64 %26, ptr %R15, align 8, !tbaa !5 + store i64 %24, ptr %PC, align 8 + %27 = add i64 %program_counter, 17 + %add.i.i15 = add i64 %12, 48 + store i64 %add.i.i15, ptr %rsp.i, align 8, !tbaa !5 + %28 = inttoptr i64 %add.i.i12 to ptr + %29 = load i64, ptr %28, align 8 + store i64 %29, ptr %RBP, align 8, !tbaa !5 + store i64 %27, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_flag_computation_carry(i1 noundef zeroext, ...) local_unnamed_addr #2 + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_flag_computation_overflow(i1 noundef zeroext, ...) local_unnamed_addr #2 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199673(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + %2 = load i64, ptr %RSP, align 8 + %3 = add i64 %2, 8 + store i64 %3, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %4 = add i64 %program_counter, 8 + %5 = load i64, ptr %RBX, align 8 + store i64 %5, ptr %RSI, align 8, !tbaa !5 + store i64 %4, ptr %PC, align 8 + %6 = add i64 %program_counter, 13 + %7 = add i64 %program_counter, 407 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %8 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %8, -8 + %9 = inttoptr i64 %sub.i.i to ptr + store i64 %6, ptr %9, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %7, ptr %rip.i, align 8, !tbaa !5 + %10 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %6, ptr %PC, align 8 + %11 = add i64 %program_counter, 28 + store i64 %11, ptr %rip.i, align 8, !tbaa !5 + store i64 %11, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199701(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %AL = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !39 + %R8 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 17, i32 0, i32 0, !remill_register !40 + %RCX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !remill_register !41 + %RDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !42 + %EBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !43 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 3 + %2 = load i64, ptr %RBX, align 8 + store i64 %2, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %3 = add i64 %program_counter, 8 + %4 = add i64 %program_counter, 1867 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %5 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %5, -8 + %6 = inttoptr i64 %sub.i.i to ptr + store i64 %3, ptr %6, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %4, ptr %rip.i, align 8, !tbaa !5 + %7 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %3, ptr %PC, align 8 + %8 = add i64 %program_counter, 11 + %9 = load i64, ptr %RBX, align 8 + store i64 %9, ptr %RDI, align 8, !tbaa !5 + store i64 %8, ptr %PC, align 8 + %10 = add i64 %program_counter, 16 + %11 = add i64 %program_counter, 1979 + %12 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i2 = add i64 %12, -8 + %13 = inttoptr i64 %sub.i.i2 to ptr + store i64 %10, ptr %13, align 8 + store i64 %sub.i.i2, ptr %rsp.i, align 8, !tbaa !5 + store i64 %11, ptr %rip.i, align 8, !tbaa !5 + %14 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %10, ptr %PC, align 8 + %15 = add i64 %program_counter, 19 + %16 = load i64, ptr %RBX, align 8 + %17 = inttoptr i64 %16 to ptr + %18 = load i8, ptr %17, align 1 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %19 = call i8 @llvm.ctpop.i8(i8 %18), !range !26 + %20 = and i8 %19, 1 + %21 = xor i8 %20, 1 + %pf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %21, ptr %pf.i.i.i, align 1, !tbaa !27 + %af.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i8 %18, 0 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + %zf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv5.i.i.i, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i8 %18, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + %sf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv8.i.i.i, ptr %sf.i.i.i, align 1, !tbaa !29 + %of.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %15, ptr %PC, align 8 + %22 = add i64 %program_counter, 24 + store i64 100, ptr %RSI, align 8, !tbaa !5 + store i64 %22, ptr %PC, align 8 + %23 = add i64 %program_counter, 29 + store i64 10, ptr %EBP, align 8, !tbaa !5 + store i64 %23, ptr %PC, align 8 + %24 = add i64 %program_counter, 32 + %cond1.i.v.i = select i1 %cmp.i.i.i.i, i64 10, i64 100 + store i64 %cond1.i.v.i, ptr %RSI, align 8, !tbaa !5 + store i64 %24, ptr %PC, align 8 + %25 = add i64 %program_counter, 37 + store i64 1, ptr %RDI, align 8, !tbaa !5 + store i64 %25, ptr %PC, align 8 + %26 = add i64 %program_counter, 42 + %27 = add i64 %program_counter, 1499 + %28 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i7 = add i64 %28, -8 + %29 = inttoptr i64 %sub.i.i7 to ptr + store i64 %26, ptr %29, align 8 + store i64 %sub.i.i7, ptr %rsp.i, align 8, !tbaa !5 + store i64 %27, ptr %rip.i, align 8, !tbaa !5 + %30 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %26, ptr %PC, align 8 + %31 = add i64 %program_counter, 46 + %32 = load i64, ptr %RBX, align 8 + %33 = add i64 %32, 1 + %34 = inttoptr i64 %33 to ptr + %35 = load i8, ptr %34, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %36 = call i8 @llvm.ctpop.i8(i8 %35), !range !26 + %37 = and i8 %36, 1 + %38 = xor i8 %37, 1 + store i8 %38, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i18 = icmp eq i8 %35, 0 + %conv5.i.i.i20 = zext i1 %cmp.i.i.i.i18 to i8 + store i8 %conv5.i.i.i20, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i22 = icmp slt i8 %35, 0 + %conv8.i.i.i24 = zext i1 %cmp.i27.i.i.i22 to i8 + store i8 %conv8.i.i.i24, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %31, ptr %PC, align 8 + %39 = add i64 %program_counter, 51 + store i64 100, ptr %RSI, align 8, !tbaa !5 + store i64 %39, ptr %PC, align 8 + %40 = add i64 %program_counter, 54 + %41 = load i32, ptr %EBP, align 4 + %42 = zext i32 %41 to i64 + %cond1.i.v.i36 = select i1 %cmp.i.i.i.i18, i64 %42, i64 100 + store i64 %cond1.i.v.i36, ptr %RSI, align 8, !tbaa !5 + store i64 %40, ptr %PC, align 8 + %43 = add i64 %program_counter, 59 + store i64 2, ptr %RDI, align 8, !tbaa !5 + store i64 %43, ptr %PC, align 8 + %44 = add i64 %program_counter, 64 + %45 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i39 = add i64 %45, -8 + %46 = inttoptr i64 %sub.i.i39 to ptr + store i64 %44, ptr %46, align 8 + store i64 %sub.i.i39, ptr %rsp.i, align 8, !tbaa !5 + store i64 %27, ptr %rip.i, align 8, !tbaa !5 + %47 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %44, ptr %PC, align 8 + %48 = add i64 %program_counter, 68 + %49 = load i64, ptr %RBX, align 8 + %50 = add i64 %49, 2 + %51 = inttoptr i64 %50 to ptr + %52 = load i8, ptr %51, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %53 = call i8 @llvm.ctpop.i8(i8 %52), !range !26 + %54 = and i8 %53, 1 + %55 = xor i8 %54, 1 + store i8 %55, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i50 = icmp eq i8 %52, 0 + %conv5.i.i.i52 = zext i1 %cmp.i.i.i.i50 to i8 + store i8 %conv5.i.i.i52, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i54 = icmp slt i8 %52, 0 + %conv8.i.i.i56 = zext i1 %cmp.i27.i.i.i54 to i8 + store i8 %conv8.i.i.i56, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %48, ptr %PC, align 8 + %56 = add i64 %program_counter, 73 + store i64 100, ptr %RSI, align 8, !tbaa !5 + store i64 %56, ptr %PC, align 8 + %57 = add i64 %program_counter, 76 + %58 = load i32, ptr %EBP, align 4 + %59 = zext i32 %58 to i64 + %cond1.i.v.i68 = select i1 %cmp.i.i.i.i50, i64 %59, i64 100 + store i64 %cond1.i.v.i68, ptr %RSI, align 8, !tbaa !5 + store i64 %57, ptr %PC, align 8 + %60 = add i64 %program_counter, 81 + store i64 3, ptr %RDI, align 8, !tbaa !5 + store i64 %60, ptr %PC, align 8 + %61 = add i64 %program_counter, 86 + %62 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i71 = add i64 %62, -8 + %63 = inttoptr i64 %sub.i.i71 to ptr + store i64 %61, ptr %63, align 8 + store i64 %sub.i.i71, ptr %rsp.i, align 8, !tbaa !5 + store i64 %27, ptr %rip.i, align 8, !tbaa !5 + %64 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %61, ptr %PC, align 8 + %65 = add i64 %program_counter, 90 + %66 = load i64, ptr %RBX, align 8 + %67 = add i64 %66, 3 + %68 = inttoptr i64 %67 to ptr + %69 = load i8, ptr %68, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %70 = call i8 @llvm.ctpop.i8(i8 %69), !range !26 + %71 = and i8 %70, 1 + %72 = xor i8 %71, 1 + store i8 %72, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i82 = icmp eq i8 %69, 0 + %conv5.i.i.i84 = zext i1 %cmp.i.i.i.i82 to i8 + store i8 %conv5.i.i.i84, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i86 = icmp slt i8 %69, 0 + %conv8.i.i.i88 = zext i1 %cmp.i27.i.i.i86 to i8 + store i8 %conv8.i.i.i88, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %65, ptr %PC, align 8 + %73 = add i64 %program_counter, 95 + store i64 100, ptr %RSI, align 8, !tbaa !5 + store i64 %73, ptr %PC, align 8 + %74 = add i64 %program_counter, 98 + %75 = load i32, ptr %EBP, align 4 + %76 = zext i32 %75 to i64 + %cond1.i.v.i100 = select i1 %cmp.i.i.i.i82, i64 %76, i64 100 + store i64 %cond1.i.v.i100, ptr %RSI, align 8, !tbaa !5 + store i64 %74, ptr %PC, align 8 + %77 = add i64 %program_counter, 103 + store i64 4, ptr %RDI, align 8, !tbaa !5 + store i64 %77, ptr %PC, align 8 + %78 = add i64 %program_counter, 108 + %79 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i103 = add i64 %79, -8 + %80 = inttoptr i64 %sub.i.i103 to ptr + store i64 %78, ptr %80, align 8 + store i64 %sub.i.i103, ptr %rsp.i, align 8, !tbaa !5 + store i64 %27, ptr %rip.i, align 8, !tbaa !5 + %81 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %78, ptr %PC, align 8 + %82 = add i64 %program_counter, 111 + %83 = load i64, ptr %RBX, align 8 + %84 = inttoptr i64 %83 to ptr + %85 = load i8, ptr %84, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %86 = call i8 @llvm.ctpop.i8(i8 %85), !range !26 + %87 = and i8 %86, 1 + %88 = xor i8 %87, 1 + store i8 %88, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i114 = icmp eq i8 %85, 0 + %conv5.i.i.i116 = zext i1 %cmp.i.i.i.i114 to i8 + store i8 %conv5.i.i.i116, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i118 = icmp slt i8 %85, 0 + %conv8.i.i.i120 = zext i1 %cmp.i27.i.i.i118 to i8 + store i8 %conv8.i.i.i120, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %82, ptr %PC, align 8 + %89 = add i64 %program_counter, 116 + store i64 4202673, ptr %RSI, align 8, !tbaa !5 + store i64 %89, ptr %PC, align 8 + %90 = add i64 %program_counter, 121 + store i64 4202675, ptr %AL, align 8, !tbaa !5 + store i64 %90, ptr %PC, align 8 + %91 = add i64 %program_counter, 125 + %cond1.i.i = select i1 %cmp.i.i.i.i114, i64 4202675, i64 4202673 + store i64 %cond1.i.i, ptr %RSI, align 8, !tbaa !5 + store i64 %91, ptr %PC, align 8 + %92 = add i64 %program_counter, 129 + %93 = add i64 %83, 1 + %94 = inttoptr i64 %93 to ptr + %95 = load i8, ptr %94, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %96 = call i8 @llvm.ctpop.i8(i8 %95), !range !26 + %97 = and i8 %96, 1 + %98 = xor i8 %97, 1 + store i8 %98, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i140 = icmp eq i8 %95, 0 + %conv5.i.i.i142 = zext i1 %cmp.i.i.i.i140 to i8 + store i8 %conv5.i.i.i142, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i144 = icmp slt i8 %95, 0 + %conv8.i.i.i146 = zext i1 %cmp.i27.i.i.i144 to i8 + store i8 %conv8.i.i.i146, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %92, ptr %PC, align 8 + %99 = add i64 %program_counter, 134 + store i64 4202673, ptr %RDX, align 8, !tbaa !5 + store i64 %99, ptr %PC, align 8 + %100 = add i64 %program_counter, 138 + %cond1.i.i158 = select i1 %cmp.i.i.i.i140, i64 4202675, i64 4202673 + store i64 %cond1.i.i158, ptr %RDX, align 8, !tbaa !5 + store i64 %100, ptr %PC, align 8 + %101 = add i64 %program_counter, 142 + %102 = add i64 %83, 2 + %103 = inttoptr i64 %102 to ptr + %104 = load i8, ptr %103, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %105 = call i8 @llvm.ctpop.i8(i8 %104), !range !26 + %106 = and i8 %105, 1 + %107 = xor i8 %106, 1 + store i8 %107, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i167 = icmp eq i8 %104, 0 + %conv5.i.i.i169 = zext i1 %cmp.i.i.i.i167 to i8 + store i8 %conv5.i.i.i169, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i171 = icmp slt i8 %104, 0 + %conv8.i.i.i173 = zext i1 %cmp.i27.i.i.i171 to i8 + store i8 %conv8.i.i.i173, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %101, ptr %PC, align 8 + %108 = add i64 %program_counter, 147 + store i64 4202673, ptr %RCX, align 8, !tbaa !5 + store i64 %108, ptr %PC, align 8 + %109 = add i64 %program_counter, 151 + %cond1.i.i185 = select i1 %cmp.i.i.i.i167, i64 4202675, i64 4202673 + store i64 %cond1.i.i185, ptr %RCX, align 8, !tbaa !5 + store i64 %109, ptr %PC, align 8 + %110 = add i64 %program_counter, 155 + %111 = add i64 %83, 3 + %112 = inttoptr i64 %111 to ptr + %113 = load i8, ptr %112, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %114 = call i8 @llvm.ctpop.i8(i8 %113), !range !26 + %115 = and i8 %114, 1 + %116 = xor i8 %115, 1 + store i8 %116, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i194 = icmp eq i8 %113, 0 + %conv5.i.i.i196 = zext i1 %cmp.i.i.i.i194 to i8 + store i8 %conv5.i.i.i196, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i198 = icmp slt i8 %113, 0 + %conv8.i.i.i200 = zext i1 %cmp.i27.i.i.i198 to i8 + store i8 %conv8.i.i.i200, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %110, ptr %PC, align 8 + %117 = add i64 %program_counter, 161 + store i64 4202673, ptr %R8, align 8, !tbaa !5 + store i64 %117, ptr %PC, align 8 + %118 = add i64 %program_counter, 165 + %cond1.i.i212 = select i1 %cmp.i.i.i.i194, i64 4202675, i64 4202673 + store i64 %cond1.i.i212, ptr %R8, align 8, !tbaa !5 + store i64 %118, ptr %PC, align 8 + %119 = add i64 %program_counter, 170 + store i64 4202652, ptr %RDI, align 8, !tbaa !5 + store i64 %119, ptr %PC, align 8 + %120 = add i64 %program_counter, 172 + store i64 0, ptr %AL, align 8, !tbaa !5 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %121 = call i8 @llvm.ctpop.i8(i8 0), !range !26 + %122 = and i8 %121, 1 + %123 = xor i8 %122, 1 + store i8 %123, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 1, ptr %zf.i.i.i, align 1, !tbaa !28 + store i8 0, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i.i, align 1, !tbaa !31 + store i64 %120, ptr %PC, align 8 + %124 = add i64 %program_counter, 177 + %125 = add i64 %program_counter, -1157 + %126 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i217 = add i64 %126, -8 + %127 = inttoptr i64 %sub.i.i217 to ptr + store i64 %124, ptr %127, align 8 + store i64 %sub.i.i217, ptr %rsp.i, align 8, !tbaa !5 + store i64 %125, ptr %rip.i, align 8, !tbaa !5 + %128 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %124, ptr %PC, align 8 + %129 = add i64 %program_counter, 179 + store i8 1, ptr %AL, align 1, !tbaa !32 + store i64 %129, ptr %PC, align 8 + %130 = add i64 %program_counter, 181 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + store i8 0, ptr %pf.i.i.i, align 1, !tbaa !27 + store i8 0, ptr %zf.i.i.i, align 1, !tbaa !28 + store i8 0, ptr %sf.i.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i.i, align 1, !tbaa !31 + store i64 %130, ptr %PC, align 8 + %cond1.i.i233 = add i64 -309, %program_counter + store i64 %cond1.i.i233, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_compare_eq(i1 noundef zeroext) local_unnamed_addr #2 + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_compare_neq(i1 noundef zeroext) local_unnamed_addr #2 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199888(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 30 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %1, ptr %rip.i, align 8, !tbaa !5 + store i64 %1, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199497(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %R8 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 17, i32 0, i32 0, !remill_register !40 + %RAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !44 + %RDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !42 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %R11 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 23, i32 0, i32 0, !remill_register !45 + %R10 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 21, i32 0, i32 0, !remill_register !46 + %R13 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 27, i32 0, i32 0, !remill_register !37 + %R15 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 31, i32 0, i32 0, !remill_register !36 + %RCX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !remill_register !41 + %R9 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 19, i32 0, i32 0, !remill_register !47 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %R14 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !remill_register !35 + %R12D = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 25, i32 0, i32 0, !remill_register !48 + %EBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !43 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 3 + %2 = load i64, ptr %RSP, align 8 + %3 = inttoptr i64 %2 to ptr + %4 = load i32, ptr %3, align 4 + %conv.i.i = zext i32 %4 to i64 + store i64 %conv.i.i, ptr %EBP, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %5 = add i64 %program_counter, 6 + %6 = load i32, ptr %EBP, align 4 + %7 = zext i32 %6 to i64 + store i64 %7, ptr %R12D, align 8, !tbaa !5 + store i64 %5, ptr %PC, align 8 + %8 = add i64 %program_counter, 13 + %and3.i.i = and i32 %6, 536870911 + %conv.i22.i = zext i32 %and3.i.i to i64 + store i64 %conv.i22.i, ptr %R12D, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %6 to i8 + %9 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %10 = and i8 %9, 1 + %11 = xor i8 %10, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %11, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %and3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 0, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 0, ptr %af.i.i, align 1, !tbaa !31 + store i64 %8, ptr %PC, align 8 + %12 = add i64 %program_counter, 17 + %13 = load i32, ptr %R12D, align 4 + store i32 %13, ptr %3, align 4 + store i64 %12, ptr %PC, align 8 + %14 = add i64 %program_counter, 22 + %15 = add i64 %2, 112 + store i64 %15, ptr %R14, align 8, !tbaa !5 + store i64 %14, ptr %PC, align 8 + %16 = add i64 %program_counter, 25 + store i64 %15, ptr %RDI, align 8, !tbaa !5 + store i64 %16, ptr %PC, align 8 + %17 = add i64 %program_counter, 30 + %18 = add i64 %program_counter, -857 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %19 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %19, -8 + %20 = inttoptr i64 %sub.i.i to ptr + store i64 %17, ptr %20, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %18, ptr %rip.i, align 8, !tbaa !5 + %21 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %17, ptr %PC, align 8 + %22 = add i64 %program_counter, 33 + %23 = load i64, ptr %R14, align 8 + store i64 %23, ptr %RDI, align 8, !tbaa !5 + store i64 %22, ptr %PC, align 8 + %24 = add i64 %program_counter, 38 + %25 = add i64 %program_counter, -969 + %26 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i5 = add i64 %26, -8 + %27 = inttoptr i64 %sub.i.i5 to ptr + store i64 %24, ptr %27, align 8 + store i64 %sub.i.i5, ptr %rsp.i, align 8, !tbaa !5 + store i64 %25, ptr %rip.i, align 8, !tbaa !5 + %28 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %24, ptr %PC, align 8 + %29 = add i64 %program_counter, 44 + %30 = load i64, ptr %RSP, align 8 + %31 = add i64 %30, 8 + %32 = inttoptr i64 %31 to ptr + %33 = load i8, ptr %32, align 1 + %conv.i.i9 = zext i8 %33 to i64 + store i64 %conv.i.i9, ptr %R9, align 8, !tbaa !5 + store i64 %29, ptr %PC, align 8 + %34 = add i64 %30, 9 + %35 = inttoptr i64 %34 to ptr + %36 = load i8, ptr %35, align 1 + %conv.i.i11 = zext i8 %36 to i64 + store i64 %conv.i.i11, ptr %RCX, align 8, !tbaa !5 + %37 = add i64 %program_counter, 54 + %38 = add i64 %30, 72 + %39 = inttoptr i64 %38 to ptr + store i64 %conv.i.i11, ptr %39, align 8 + store i64 %37, ptr %PC, align 8 + %40 = add i64 %30, 10 + %41 = inttoptr i64 %40 to ptr + %42 = load i8, ptr %41, align 1 + %conv.i.i14 = zext i8 %42 to i64 + store i64 %conv.i.i14, ptr %RCX, align 8, !tbaa !5 + %43 = add i64 %program_counter, 64 + %44 = add i64 %30, 64 + %45 = inttoptr i64 %44 to ptr + store i64 %conv.i.i14, ptr %45, align 8 + store i64 %43, ptr %PC, align 8 + %46 = add i64 %program_counter, 70 + %47 = add i64 %30, 11 + %48 = inttoptr i64 %47 to ptr + %49 = load i8, ptr %48, align 1 + %conv.i.i17 = zext i8 %49 to i64 + store i64 %conv.i.i17, ptr %R15, align 8, !tbaa !5 + store i64 %46, ptr %PC, align 8 + %50 = add i64 %program_counter, 76 + %51 = add i64 %30, 12 + %52 = inttoptr i64 %51 to ptr + %53 = load i8, ptr %52, align 1 + %conv.i.i19 = zext i8 %53 to i64 + store i64 %conv.i.i19, ptr %R13, align 8, !tbaa !5 + store i64 %50, ptr %PC, align 8 + %54 = add i64 %program_counter, 82 + %55 = add i64 %30, 13 + %56 = inttoptr i64 %55 to ptr + %57 = load i8, ptr %56, align 1 + %conv.i.i21 = zext i8 %57 to i64 + store i64 %conv.i.i21, ptr %R14, align 8, !tbaa !5 + store i64 %54, ptr %PC, align 8 + %58 = add i64 %program_counter, 88 + %59 = add i64 %30, 14 + %60 = inttoptr i64 %59 to ptr + %61 = load i8, ptr %60, align 1 + %conv.i.i23 = zext i8 %61 to i64 + store i64 %conv.i.i23, ptr %R10, align 8, !tbaa !5 + store i64 %58, ptr %PC, align 8 + %62 = add i64 %program_counter, 94 + %63 = add i64 %30, 15 + %64 = inttoptr i64 %63 to ptr + %65 = load i8, ptr %64, align 1 + %conv.i.i25 = zext i8 %65 to i64 + store i64 %conv.i.i25, ptr %R11, align 8, !tbaa !5 + store i64 %62, ptr %PC, align 8 + %66 = add i64 %program_counter, 98 + %sub.i.i26 = add i64 %30, -8 + store i64 %sub.i.i26, ptr %RSP, align 8, !tbaa !5 + %cmp.i.i.i27 = icmp ult i64 %30, 8 + %conv.i.i29 = zext i1 %cmp.i.i.i27 to i8 + store i8 %conv.i.i29, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i64 %sub.i.i26 to i8 + %67 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %68 = and i8 %67, 1 + %69 = xor i8 %68, 1 + store i8 %69, ptr %pf.i.i, align 1, !tbaa !27 + %70 = xor i64 %30, %sub.i.i26 + %71 = trunc i64 %70 to i8 + %72 = lshr i8 %71, 4 + %73 = and i8 %72, 1 + store i8 %73, ptr %af.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i64 %30, 8 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + store i8 %conv5.i.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i64 %sub.i.i26, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + store i8 %conv8.i.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i64 %30, 63 + %shr2.i.i.i.i = lshr i64 %sub.i.i26, 63 + %xor3.i.i.i.i = xor i64 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i64 %xor3.i.i.i.i, %shr.i.i.i.i + %cmp.i29.i.i.i = icmp eq i64 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + store i8 %conv11.i.i.i, ptr %of.i.i, align 1, !tbaa !30 + store i64 %66, ptr %PC, align 8 + %74 = add i64 %program_counter, 103 + store i64 4202592, ptr %RSI, align 8, !tbaa !5 + store i64 %74, ptr %PC, align 8 + %75 = add i64 %program_counter, 108 + store i64 4202648, ptr %RCX, align 8, !tbaa !5 + store i64 %75, ptr %PC, align 8 + %76 = add i64 %program_counter, 113 + %77 = add i64 %30, 40 + %78 = inttoptr i64 %77 to ptr + %79 = load i64, ptr %78, align 8 + store i64 %79, ptr %RDI, align 8, !tbaa !5 + store i64 %76, ptr %PC, align 8 + %80 = add i64 %program_counter, 116 + %81 = load i64, ptr %RAX, align 8 + store i64 %81, ptr %RDX, align 8, !tbaa !5 + store i64 %80, ptr %PC, align 8 + %82 = add i64 %program_counter, 119 + %83 = load i32, ptr %R12D, align 4 + %84 = zext i32 %83 to i64 + store i64 %84, ptr %R8, align 8, !tbaa !5 + store i64 %82, ptr %PC, align 8 + %85 = add i64 %program_counter, 124 + store i64 0, ptr %RAX, align 8, !tbaa !5 + store i64 %85, ptr %PC, align 8 + %86 = add i64 %program_counter, 126 + %87 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i.i = add i64 %87, -8 + %88 = inttoptr i64 %sub.i.i.i to ptr + store i64 %conv.i.i25, ptr %88, align 8 + store i64 %sub.i.i.i, ptr %rsp.i, align 8, !tbaa !5 + store i64 %86, ptr %PC, align 8 + %89 = add i64 %program_counter, 128 + %sub.i.i.i35 = add i64 %87, -16 + %90 = inttoptr i64 %sub.i.i.i35 to ptr + store i64 %conv.i.i23, ptr %90, align 8 + store i64 %sub.i.i.i35, ptr %rsp.i, align 8, !tbaa !5 + store i64 %89, ptr %PC, align 8 + %91 = add i64 %program_counter, 130 + %sub.i.i.i38 = add i64 %87, -24 + %92 = inttoptr i64 %sub.i.i.i38 to ptr + store i64 %conv.i.i21, ptr %92, align 8 + store i64 %sub.i.i.i38, ptr %rsp.i, align 8, !tbaa !5 + store i64 %91, ptr %PC, align 8 + %93 = add i64 %program_counter, 132 + %sub.i.i.i41 = add i64 %87, -32 + %94 = inttoptr i64 %sub.i.i.i41 to ptr + store i64 %conv.i.i19, ptr %94, align 8 + store i64 %sub.i.i.i41, ptr %rsp.i, align 8, !tbaa !5 + store i64 %93, ptr %PC, align 8 + %95 = add i64 %program_counter, 134 + %sub.i.i.i44 = add i64 %87, -40 + %96 = inttoptr i64 %sub.i.i.i44 to ptr + store i64 %conv.i.i17, ptr %96, align 8 + store i64 %sub.i.i.i44, ptr %rsp.i, align 8, !tbaa !5 + store i64 %95, ptr %PC, align 8 + %97 = add i64 %program_counter, 138 + %98 = load i64, ptr %RSP, align 8 + %99 = add i64 %98, 112 + %100 = inttoptr i64 %99 to ptr + %101 = load i64, ptr %100, align 8 + %sub.i.i.i48 = add i64 %87, -48 + %102 = inttoptr i64 %sub.i.i.i48 to ptr + store i64 %101, ptr %102, align 8 + store i64 %sub.i.i.i48, ptr %rsp.i, align 8, !tbaa !5 + store i64 %97, ptr %PC, align 8 + %103 = add i64 %program_counter, 145 + %104 = load i64, ptr %RSP, align 8 + %105 = add i64 %104, 128 + %106 = inttoptr i64 %105 to ptr + %107 = load i64, ptr %106, align 8 + %sub.i.i.i52 = add i64 %87, -56 + %108 = inttoptr i64 %sub.i.i.i52 to ptr + store i64 %107, ptr %108, align 8 + store i64 %sub.i.i.i52, ptr %rsp.i, align 8, !tbaa !5 + store i64 %103, ptr %PC, align 8 + %109 = add i64 %program_counter, 150 + %110 = add i64 %program_counter, -873 + %sub.i.i55 = add i64 %87, -64 + %111 = inttoptr i64 %sub.i.i55 to ptr + store i64 %109, ptr %111, align 8 + store i64 %sub.i.i55, ptr %rsp.i, align 8, !tbaa !5 + store i64 %110, ptr %rip.i, align 8, !tbaa !5 + %112 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %109, ptr %PC, align 8 + %113 = add i64 %program_counter, 154 + %114 = load i64, ptr %RSP, align 8 + %add.i.i = add i64 %114, 64 + store i64 %add.i.i, ptr %RSP, align 8, !tbaa !5 + %cmp.i.i.i58 = icmp ugt i64 %114, -65 + %conv.i.i60 = zext i1 %cmp.i.i.i58 to i8 + store i8 %conv.i.i60, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i62 = trunc i64 %add.i.i to i8 + %115 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i62), !range !26 + %116 = and i8 %115, 1 + %117 = xor i8 %116, 1 + store i8 %117, ptr %pf.i.i, align 1, !tbaa !27 + %118 = xor i64 %114, %add.i.i + %119 = trunc i64 %118 to i8 + %120 = lshr i8 %119, 4 + %121 = and i8 %120, 1 + store i8 %121, ptr %af.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i67 = icmp eq i64 %add.i.i, 0 + %conv5.i.i.i69 = zext i1 %cmp.i.i.i.i67 to i8 + store i8 %conv5.i.i.i69, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i71 = icmp slt i64 %add.i.i, 0 + %conv8.i.i.i73 = zext i1 %cmp.i27.i.i.i71 to i8 + store i8 %conv8.i.i.i73, ptr %sf.i.i, align 1, !tbaa !29 + %shr.i.i.i.i75 = lshr i64 %114, 63 + %shr2.i.i.i.i76 = lshr i64 %add.i.i, 63 + %xor.i28.i.i.i = xor i64 %shr2.i.i.i.i76, %shr.i.i.i.i75 + %add.i.i.i.i77 = add nuw nsw i64 %xor.i28.i.i.i, %shr2.i.i.i.i76 + %cmp.i29.i.i.i78 = icmp eq i64 %add.i.i.i.i77, 2 + %conv11.i.i.i80 = zext i1 %cmp.i29.i.i.i78 to i8 + store i8 %conv11.i.i.i80, ptr %of.i.i, align 1, !tbaa !30 + store i64 %113, ptr %PC, align 8 + %122 = add i64 %program_counter, 160 + %123 = load i64, ptr %EBP, align 8 + %conv.i.i82 = trunc i64 %123 to i32 + %and3.i.i83 = and i32 %conv.i.i82, 134217472 + %conv.i22.i84 = zext i32 %and3.i.i83 to i64 + store i64 %conv.i22.i84, ptr %EBP, align 8, !tbaa !5 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + store i8 1, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i88 = icmp eq i32 %and3.i.i83, 0 + %conv3.i.i90 = zext i1 %cmp.i.i.i88 to i8 + store i8 %conv3.i.i90, ptr %zf.i.i, align 1, !tbaa !28 + store i8 0, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 0, ptr %af.i.i, align 1, !tbaa !31 + store i64 %122, ptr %PC, align 8 + %124 = add i64 %program_counter, 166 + %125 = load i32, ptr %EBP, align 4 + %sub.i.i97 = add i32 %125, -16632832 + %cmp.i.i.i98 = icmp ult i32 %125, 16632832 + %conv.i12.i = zext i1 %cmp.i.i.i98 to i8 + store i8 %conv.i12.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i101 = trunc i32 %sub.i.i97 to i8 + %126 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i101), !range !26 + %127 = and i8 %126, 1 + %128 = xor i8 %127, 1 + store i8 %128, ptr %pf.i.i, align 1, !tbaa !27 + %129 = xor i32 %125, %sub.i.i97 + %130 = trunc i32 %129 to i8 + %131 = lshr i8 %130, 4 + %132 = and i8 %131, 1 + store i8 %132, ptr %af.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i106 = icmp eq i32 %125, 16632832 + %conv5.i.i.i108 = zext i1 %cmp.i.i.i.i106 to i8 + store i8 %conv5.i.i.i108, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i110 = icmp slt i32 %sub.i.i97, 0 + %conv8.i.i.i112 = zext i1 %cmp.i27.i.i.i110 to i8 + store i8 %conv8.i.i.i112, ptr %sf.i.i, align 1, !tbaa !29 + %shr.i.i.i.i114 = lshr i32 %125, 31 + %shr2.i.i.i.i115 = lshr i32 %sub.i.i97, 31 + %xor3.i.i.i.i116 = xor i32 %shr2.i.i.i.i115, %shr.i.i.i.i114 + %add.i.i.i.i117 = add nuw nsw i32 %xor3.i.i.i.i116, %shr.i.i.i.i114 + %cmp.i29.i.i.i118 = icmp eq i32 %add.i.i.i.i117, 2 + %conv11.i.i.i120 = zext i1 %cmp.i29.i.i.i118 to i8 + store i8 %conv11.i.i.i120, ptr %of.i.i, align 1, !tbaa !30 + store i64 %124, ptr %PC, align 8 + %cond1.i.i.v = select i1 %cmp.i.i.i.i106, i64 191, i64 168 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199688(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + %2 = load i64, ptr %RSP, align 8 + %3 = add i64 %2, 8 + store i64 %3, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %4 = add i64 %program_counter, 8 + %5 = load i64, ptr %RBX, align 8 + store i64 %5, ptr %RSI, align 8, !tbaa !5 + store i64 %4, ptr %PC, align 8 + %6 = add i64 %program_counter, 13 + %7 = add i64 %program_counter, 456 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %8 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %8, -8 + %9 = inttoptr i64 %sub.i.i to ptr + store i64 %6, ptr %9, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %7, ptr %rip.i, align 8, !tbaa !5 + %10 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %6, ptr %PC, align 8 + store i64 %6, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199297(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %ECX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !remill_register !49 + %EDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !50 + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %ESI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !51 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 32, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + %3 = add i64 %program_counter, -625 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %4 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %4, -8 + %5 = inttoptr i64 %sub.i.i to ptr + store i64 %2, ptr %5, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %3, ptr %rip.i, align 8, !tbaa !5 + %6 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %2, ptr %PC, align 8 + %7 = add i64 %program_counter, 13 + %8 = load i64, ptr %EAX, align 8 + store i64 %8, ptr %RBX, align 8, !tbaa !5 + store i64 %7, ptr %PC, align 8 + %9 = add i64 %program_counter, 16 + store i64 %8, ptr %RDI, align 8, !tbaa !5 + store i64 %9, ptr %PC, align 8 + %10 = add i64 %program_counter, 21 + %11 = add i64 %program_counter, 2047 + %12 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i2 = add i64 %12, -8 + %13 = inttoptr i64 %sub.i.i2 to ptr + store i64 %10, ptr %13, align 8 + store i64 %sub.i.i2, ptr %rsp.i, align 8, !tbaa !5 + store i64 %11, ptr %rip.i, align 8, !tbaa !5 + %14 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %10, ptr %PC, align 8 + %15 = add i64 %program_counter, 26 + %16 = load i64, ptr %RSP, align 8 + %17 = add i64 %16, 32 + %18 = inttoptr i64 %17 to ptr + %19 = load i64, ptr %18, align 8 + store i64 %19, ptr %ESI, align 8, !tbaa !5 + store i64 %15, ptr %PC, align 8 + %20 = add i64 %program_counter, 29 + %21 = add i64 %19, 63 + %22 = trunc i64 %21 to i32 + %23 = zext i32 %22 to i64 + store i64 %23, ptr %EAX, align 8, !tbaa !5 + store i64 %20, ptr %PC, align 8 + %24 = add i64 %program_counter, 31 + %25 = load i32, ptr %ESI, align 4 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %25 to i8 + %26 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %27 = and i8 %26, 1 + %28 = xor i8 %27, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %28, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %25, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %25, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %24, ptr %PC, align 8 + %29 = add i64 %program_counter, 34 + %30 = zext i32 %25 to i64 + %cond1.i.v.i = select i1 %cmp.i19.i.i, i64 %21, i64 %30 + %31 = trunc i64 %cond1.i.v.i to i32 + %32 = zext i32 %31 to i64 + store i64 %32, ptr %EAX, align 8, !tbaa !5 + store i64 %29, ptr %PC, align 8 + %33 = add i64 %program_counter, 36 + %34 = load i32, ptr %EAX, align 4 + %35 = zext i32 %34 to i64 + store i64 %35, ptr %EDX, align 8, !tbaa !5 + store i64 %33, ptr %PC, align 8 + %36 = add i64 %program_counter, 39 + %and3.i.i7 = and i32 %34, -64 + %conv.i22.i = zext i32 %and3.i.i7 to i64 + store i64 %conv.i22.i, ptr %EDX, align 8, !tbaa !5 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i9 = trunc i32 %and3.i.i7 to i8 + %37 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i9), !range !26 + %38 = and i8 %37, 1 + %39 = xor i8 %38, 1 + store i8 %39, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i11 = icmp eq i32 %and3.i.i7, 0 + %conv3.i.i13 = zext i1 %cmp.i.i.i11 to i8 + store i8 %conv3.i.i13, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i15 = icmp slt i32 %and3.i.i7, 0 + %conv6.i.i17 = zext i1 %cmp.i19.i.i15 to i8 + store i8 %conv6.i.i17, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 0, ptr %af.i.i, align 1, !tbaa !31 + store i64 %36, ptr %PC, align 8 + %40 = add i64 %program_counter, 41 + store i64 %30, ptr %ECX, align 8, !tbaa !5 + store i64 %40, ptr %PC, align 8 + %41 = add i64 %program_counter, 43 + %42 = load i32, ptr %EDX, align 4 + %sub.i.i23 = sub i32 %25, %42 + %conv.i22.i24 = zext i32 %sub.i.i23 to i64 + store i64 %conv.i22.i24, ptr %ECX, align 8, !tbaa !5 + %cmp.i.i.i25 = icmp ult i32 %25, %42 + %conv.i23.i = zext i1 %cmp.i.i.i25 to i8 + store i8 %conv.i23.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i32 %sub.i.i23 to i8 + %43 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %44 = and i8 %43, 1 + %45 = xor i8 %44, 1 + store i8 %45, ptr %pf.i.i, align 1, !tbaa !27 + %xor.i.i.i.i = xor i32 %42, %25 + %xor1.i.i.i.i = xor i32 %xor.i.i.i.i, %sub.i.i23 + %46 = trunc i32 %xor1.i.i.i.i to i8 + %47 = lshr i8 %46, 4 + %48 = and i8 %47, 1 + store i8 %48, ptr %af.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i32 %25, %42 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + store i8 %conv5.i.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i32 %sub.i.i23, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + store i8 %conv8.i.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i32 %25, 31 + %shr1.i.i.i.i = lshr i32 %42, 31 + %shr2.i.i.i.i = lshr i32 %sub.i.i23, 31 + %xor.i28.i.i.i = xor i32 %shr1.i.i.i.i, %shr.i.i.i.i + %xor3.i.i.i.i = xor i32 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i32 %xor3.i.i.i.i, %xor.i28.i.i.i + %cmp.i29.i.i.i = icmp eq i32 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + store i8 %conv11.i.i.i, ptr %of.i.i, align 1, !tbaa !30 + store i64 %41, ptr %PC, align 8 + %49 = add i64 %program_counter, 48 + store i64 1, ptr %EDX, align 8, !tbaa !5 + store i64 %49, ptr %PC, align 8 + %50 = load i8, ptr %ECX, align 1 + %51 = and i8 %50, 63 + %and.i.i = zext i8 %51 to i64 + switch i64 %and.i.i, label %if.then35.i [ + i64 0, label %do.body16.i + i64 1, label %do.body55.i + ] + +do.body16.i: ; preds = %0 + store i64 1, ptr %EDX, align 8, !tbaa !5 + br label %_ZN12_GLOBAL__N_13SHLI3RnWIyE2RnIyLb1EES4_EEP6MemoryS6_R5StateT_T0_T1_.exit + +if.then35.i: ; preds = %0 + %sub.i.i28 = add nsw i64 %and.i.i, -1 + %shl.i145.i = shl i64 1, %sub.i.i28 + %shl.i147.i = shl i64 2, %sub.i.i28 + %phi.bo = lshr i64 %shl.i145.i, 63 + %phi.cast = trunc i64 %phi.bo to i8 + br label %do.body55.i + +do.body55.i: ; preds = %0, %if.then35.i + %new_cf.0.shrunk.in.i = phi i8 [ %phi.cast, %if.then35.i ], [ 0, %0 ] + %new_val.0.i = phi i64 [ %shl.i147.i, %if.then35.i ], [ 2, %0 ] + store i64 %new_val.0.i, ptr %EDX, align 8, !tbaa !5 + store i8 %new_cf.0.shrunk.in.i, ptr %cf.i.i, align 1, !tbaa !32 + %conv.i.i29 = trunc i64 %new_val.0.i to i8 + %52 = call i8 @llvm.ctpop.i8(i8 %conv.i.i29), !range !26 + %53 = and i8 %52, 1 + %54 = xor i8 %53, 1 + store i8 %54, ptr %pf.i.i, align 1, !tbaa !32 + store i8 undef, ptr %af.i.i, align 1, !tbaa !32 + %cmp.i148.i = icmp eq i64 %new_val.0.i, 0 + %conv85.i = zext i1 %cmp.i148.i to i8 + store i8 %conv85.i, ptr %zf.i.i, align 1, !tbaa !32 + %new_val.0.lobit.i = lshr i64 %new_val.0.i, 63 + %55 = trunc i64 %new_val.0.lobit.i to i8 + store i8 %55, ptr %sf.i.i, align 1, !tbaa !32 + store i8 0, ptr %of.i.i, align 1, !tbaa !32 + br label %_ZN12_GLOBAL__N_13SHLI3RnWIyE2RnIyLb1EES4_EEP6MemoryS6_R5StateT_T0_T1_.exit + +_ZN12_GLOBAL__N_13SHLI3RnWIyE2RnIyLb1EES4_EEP6MemoryS6_R5StateT_T0_T1_.exit: ; preds = %do.body55.i, %do.body16.i + %56 = add i64 %program_counter, 51 + store i64 %56, ptr %PC, align 8 + %57 = add i64 %program_counter, 56 + %58 = add i64 %16, 56 + %59 = load i64, ptr %EDX, align 8 + %60 = inttoptr i64 %58 to ptr + store i64 %59, ptr %60, align 8 + store i64 %57, ptr %PC, align 8 + %61 = add i64 %program_counter, 59 + %62 = load i64, ptr %ESI, align 8 + %63 = add i64 %62, 1 + %64 = trunc i64 %63 to i32 + %65 = zext i32 %64 to i64 + store i64 %65, ptr %ECX, align 8, !tbaa !5 + store i64 %61, ptr %PC, align 8 + %66 = add i64 %program_counter, 63 + %67 = add i64 %16, 28 + %68 = load i32, ptr %ECX, align 4 + %69 = inttoptr i64 %67 to ptr + store i32 %68, ptr %69, align 4 + store i64 %66, ptr %PC, align 8 + %70 = add i64 %program_counter, 66 + %71 = load i64, ptr %EAX, align 8 + %sext167.i = shl i64 %71, 32 + %72 = lshr i64 %sext167.i, 37 + %shr.i160.i = ashr i64 %sext167.i, 38 + %new_val.0.i34 = trunc i64 %shr.i160.i to i32 + %73 = zext i32 %new_val.0.i34 to i64 + store i64 %73, ptr %EAX, align 8, !tbaa !5 + %74 = trunc i64 %72 to i8 + %75 = and i8 %74, 1 + store i8 %75, ptr %cf.i.i, align 1, !tbaa !32 + %conv.i164.i = trunc i64 %shr.i160.i to i8 + %76 = call i8 @llvm.ctpop.i8(i8 %conv.i164.i), !range !26 + %77 = and i8 %76, 1 + %78 = xor i8 %77, 1 + store i8 %78, ptr %pf.i.i, align 1, !tbaa !32 + store i8 undef, ptr %af.i.i, align 1, !tbaa !32 + %cmp.i165.i = icmp eq i32 %new_val.0.i34, 0 + %conv88.i = zext i1 %cmp.i165.i to i8 + store i8 %conv88.i, ptr %zf.i.i, align 1, !tbaa !32 + %new_val.0.lobit.i40 = lshr i32 %new_val.0.i34, 31 + %79 = trunc i32 %new_val.0.lobit.i40 to i8 + store i8 %79, ptr %sf.i.i, align 1, !tbaa !32 + store i8 0, ptr %of.i.i, align 1, !tbaa !32 + store i64 %70, ptr %PC, align 8 + %80 = add i64 %program_counter, 68 + %rax.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 1 + %81 = load i32, ptr %rax.i, align 8, !tbaa !32 + %conv.i.i42 = sext i32 %81 to i64 + store i64 %conv.i.i42, ptr %rax.i, align 8, !tbaa !5 + store i64 %80, ptr %PC, align 8 + %82 = add i64 %16, 48 + %83 = load i64, ptr %EAX, align 8 + %84 = inttoptr i64 %82 to ptr + store i64 %83, ptr %84, align 8 + %85 = add i64 %16, 20 + %86 = inttoptr i64 %85 to ptr + store i32 0, ptr %86, align 4 + %87 = add i64 %program_counter, 91 + store i64 %87, ptr %PC, align 8 + %88 = add i64 %program_counter, 95 + store i64 %88, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199665(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !43 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 6 + %2 = load i32, ptr %EBP, align 4 + %sub.i.i = add i32 %2, -16707840 + %cmp.i.i.i = icmp ult i32 %2, 16707840 + %conv.i12.i = zext i1 %cmp.i.i.i to i8 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 %conv.i12.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i32 %sub.i.i to i8 + %3 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %4 = and i8 %3, 1 + %5 = xor i8 %4, 1 + %pf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %5, ptr %pf.i.i.i, align 1, !tbaa !27 + %6 = xor i32 %2, %sub.i.i + %7 = trunc i32 %6 to i8 + %8 = lshr i8 %7, 4 + %9 = and i8 %8, 1 + %af.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 %9, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i32 %2, 16707840 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + %zf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv5.i.i.i, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i32 %sub.i.i, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + %sf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv8.i.i.i, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i32 %2, 31 + %shr2.i.i.i.i = lshr i32 %sub.i.i, 31 + %xor3.i.i.i.i = xor i32 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i32 %xor3.i.i.i.i, %shr.i.i.i.i + %cmp.i29.i.i.i = icmp eq i32 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + %of.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 %conv11.i.i.i, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %1, ptr %PC, align 8 + %cond1.i.i.v = select i1 %cmp.i.i.i.i, i64 8, i64 36 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199918(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !44 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 4 + %2 = load i64, ptr %RSP, align 8 + %3 = add i64 %2, 20 + %4 = inttoptr i64 %3 to ptr + %5 = load i32, ptr %4, align 4 + %conv.i.i = zext i32 %5 to i64 + store i64 %conv.i.i, ptr %RAX, align 8, !tbaa !5 + store i64 %1, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199219(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 4202677, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + %3 = add i64 %program_counter, -755 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %4 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %4, -8 + %5 = inttoptr i64 %sub.i.i to ptr + store i64 %2, ptr %5, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %3, ptr %rip.i, align 8, !tbaa !5 + %6 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %2, ptr %PC, align 8 + %7 = add i64 %program_counter, 12 + %8 = load i64, ptr %EAX, align 8 + %9 = load i32, ptr %EAX, align 4 + %conv.i.i = trunc i64 %8 to i32 + %xor3.i.i = xor i32 %9, %conv.i.i + %conv.i27.i = zext i32 %xor3.i.i to i64 + store i64 %conv.i27.i, ptr %EAX, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %xor3.i.i to i8 + %10 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %11 = and i8 %10, 1 + %12 = xor i8 %11, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %12, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %xor3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %xor3.i.i, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %7, ptr %PC, align 8 + %13 = add i64 %program_counter, 17 + %14 = add i64 %program_counter, 941 + %15 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i2 = add i64 %15, -8 + %16 = inttoptr i64 %sub.i.i2 to ptr + store i64 %13, ptr %16, align 8 + store i64 %sub.i.i2, ptr %rsp.i, align 8, !tbaa !5 + store i64 %14, ptr %rip.i, align 8, !tbaa !5 + %17 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + %18 = add i64 %program_counter, 27 + store i64 %18, ptr %PC, align 8 + %19 = add i64 %program_counter, 29 + store i64 %19, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199392(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %ECX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !remill_register !49 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 16, ptr %ECX, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 7 + %3 = load i64, ptr %EAX, align 8 + %4 = load i32, ptr %EAX, align 4 + %conv.i.i = trunc i64 %3 to i32 + %xor3.i.i = xor i32 %4, %conv.i.i + %conv.i27.i = zext i32 %xor3.i.i to i64 + store i64 %conv.i27.i, ptr %EAX, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %xor3.i.i to i8 + %5 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %6 = and i8 %5, 1 + %7 = xor i8 %6, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %7, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %xor3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %xor3.i.i, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %2, ptr %PC, align 8 + %8 = add i64 %program_counter, 12 + %9 = load i64, ptr %RSP, align 8 + %10 = add i64 %9, 120 + store i64 %10, ptr %RSI, align 8, !tbaa !5 + store i64 %8, ptr %PC, align 8 + %11 = add i64 %program_counter, 15 + store i64 %10, ptr %RDI, align 8, !tbaa !5 + store i64 %11, ptr %PC, align 8 + %12 = add i64 %program_counter, 16 + %df.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 11 + store i8 0, ptr %df.i, align 1, !tbaa !52 + store i64 %12, ptr %PC, align 8 + %rcx.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 5 + %13 = load i64, ptr %rcx.i, align 8, !tbaa !32 + %cmp.i.not14.i = icmp eq i64 %13, 0 + br i1 %cmp.i.not14.i, label %_ZN12_GLOBAL__N_111DoREP_STOSQEP6MemoryR5State.exit, label %while.body.lr.ph.i + +while.body.lr.ph.i: ; preds = %0 + %rdi.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 11 + %rax.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 1 + %14 = load i64, ptr %rax.i.i, align 8, !tbaa !32 + %rdi.i.promoted.i = load i64, ptr %rdi.i.i, align 8, !tbaa !32 + %15 = shl i64 %13, 3 + br label %while.body.i + +while.body.i: ; preds = %while.body.i, %while.body.lr.ph.i + %next_addr.0.i17.i = phi i64 [ %rdi.i.promoted.i, %while.body.lr.ph.i ], [ %next_addr.0.i.i, %while.body.i ] + %count_reg.016.i = phi i64 [ %13, %while.body.lr.ph.i ], [ %sub.i.i, %while.body.i ] + %16 = inttoptr i64 %next_addr.0.i17.i to ptr + store i64 %14, ptr %16, align 8 + %next_addr.0.i.i = add i64 %next_addr.0.i17.i, 8 + %sub.i.i = add i64 %count_reg.016.i, -1 + %cmp.i.not.i = icmp eq i64 %sub.i.i, 0 + br i1 %cmp.i.not.i, label %while.cond.while.end_crit_edge.i, label %while.body.i + +while.cond.while.end_crit_edge.i: ; preds = %while.body.i + %17 = add i64 %rdi.i.promoted.i, %15 + store i64 %17, ptr %rdi.i.i, align 8, !tbaa !32 + store i64 0, ptr %rcx.i, align 8, !tbaa !5 + br label %_ZN12_GLOBAL__N_111DoREP_STOSQEP6MemoryR5State.exit + +_ZN12_GLOBAL__N_111DoREP_STOSQEP6MemoryR5State.exit: ; preds = %while.cond.while.end_crit_edge.i, %0 + %18 = add i64 %program_counter, 19 + %EDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !50 + %R8 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 17, i32 0, i32 0, !remill_register !40 + store i64 %18, ptr %PC, align 8 + %19 = add i64 %program_counter, 24 + %20 = add i64 %9, 56 + %21 = inttoptr i64 %20 to ptr + %22 = load i64, ptr %21, align 8 + store i64 %22, ptr %EAX, align 8, !tbaa !5 + store i64 %19, ptr %PC, align 8 + %23 = add i64 %program_counter, 29 + %24 = add i64 %9, 48 + %25 = inttoptr i64 %24 to ptr + %26 = load i64, ptr %25, align 8 + store i64 %26, ptr %ECX, align 8, !tbaa !5 + store i64 %23, ptr %PC, align 8 + %27 = shl i64 %26, 3 + %28 = add i64 %9, %27 + %29 = add i64 %28, 120 + %30 = inttoptr i64 %29 to ptr + %31 = load i64, ptr %30, align 8 + %or.i.i = or i64 %31, %22 + store i64 %or.i.i, ptr %30, align 8 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i5 = trunc i64 %or.i.i to i8 + %32 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i5), !range !26 + %33 = and i8 %32, 1 + %34 = xor i8 %33, 1 + store i8 %34, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i7 = icmp eq i64 %or.i.i, 0 + %conv3.i.i9 = zext i1 %cmp.i.i.i7 to i8 + store i8 %conv3.i.i9, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i11 = icmp slt i64 %or.i.i, 0 + %conv6.i.i13 = zext i1 %cmp.i19.i.i11 to i8 + store i8 %conv6.i.i13, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + %35 = add i64 %9, 96 + %36 = inttoptr i64 %35 to ptr + store i64 0, ptr %36, align 8 + %37 = add i64 %program_counter, 52 + %38 = add i64 %9, 104 + %39 = inttoptr i64 %38 to ptr + store i64 50, ptr %39, align 8 + store i64 %37, ptr %PC, align 8 + %40 = add i64 %program_counter, 56 + %41 = add i64 %9, 28 + %42 = inttoptr i64 %41 to ptr + %43 = load i32, ptr %42, align 4 + %conv.i.i21 = zext i32 %43 to i64 + store i64 %conv.i.i21, ptr %RDI, align 8, !tbaa !5 + store i64 %40, ptr %PC, align 8 + %44 = add i64 %program_counter, 58 + %45 = load i64, ptr %EDX, align 8 + %46 = load i32, ptr %EDX, align 4 + %conv.i.i22 = trunc i64 %45 to i32 + %xor3.i.i23 = xor i32 %46, %conv.i.i22 + %conv.i27.i24 = zext i32 %xor3.i.i23 to i64 + store i64 %conv.i27.i24, ptr %EDX, align 8, !tbaa !5 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i26 = trunc i32 %xor3.i.i23 to i8 + %47 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i26), !range !26 + %48 = and i8 %47, 1 + %49 = xor i8 %48, 1 + store i8 %49, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i28 = icmp eq i32 %xor3.i.i23, 0 + %conv3.i.i30 = zext i1 %cmp.i.i.i28 to i8 + store i8 %conv3.i.i30, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i32 = icmp slt i32 %xor3.i.i23, 0 + %conv6.i.i34 = zext i1 %cmp.i19.i.i32 to i8 + store i8 %conv6.i.i34, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %44, ptr %PC, align 8 + %50 = add i64 %program_counter, 60 + %51 = load i32, ptr %ECX, align 4 + %conv.i.i39 = trunc i64 %26 to i32 + %xor3.i.i40 = xor i32 %51, %conv.i.i39 + %conv.i27.i41 = zext i32 %xor3.i.i40 to i64 + store i64 %conv.i27.i41, ptr %ECX, align 8, !tbaa !5 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i43 = trunc i32 %xor3.i.i40 to i8 + %52 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i43), !range !26 + %53 = and i8 %52, 1 + %54 = xor i8 %53, 1 + store i8 %54, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i45 = icmp eq i32 %xor3.i.i40, 0 + %conv3.i.i47 = zext i1 %cmp.i.i.i45 to i8 + store i8 %conv3.i.i47, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i49 = icmp slt i32 %xor3.i.i40, 0 + %conv6.i.i51 = zext i1 %cmp.i19.i.i49 to i8 + store i8 %conv6.i.i51, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %50, ptr %PC, align 8 + %55 = add i64 %program_counter, 65 + store i64 %35, ptr %R8, align 8, !tbaa !5 + store i64 %55, ptr %PC, align 8 + %56 = add i64 %program_counter, 70 + %57 = add i64 %program_counter, -736 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %58 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i56 = add i64 %58, -8 + %59 = inttoptr i64 %sub.i.i56 to ptr + store i64 %56, ptr %59, align 8 + store i64 %sub.i.i56, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %57, ptr %rip.i, align 8, !tbaa !5 + %60 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %56, ptr %PC, align 8 + %61 = add i64 %program_counter, 72 + %62 = load i32, ptr %EAX, align 4 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i59 = trunc i32 %62 to i8 + %63 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i59), !range !26 + %64 = and i8 %63, 1 + %65 = xor i8 %64, 1 + store i8 %65, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i61 = icmp eq i32 %62, 0 + %conv3.i.i63 = zext i1 %cmp.i.i.i61 to i8 + store i8 %conv3.i.i63, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i65 = icmp slt i32 %62, 0 + %conv6.i.i67 = zext i1 %cmp.i19.i.i65 to i8 + store i8 %conv6.i.i67, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %61, ptr %PC, align 8 + %66 = or i1 %cmp.i.i.i61, %cmp.i19.i.i65 + %cond1.i.i.v = select i1 %66, i64 309, i64 78 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_compare_sle(i1 noundef zeroext) local_unnamed_addr #2 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199024(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !53 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %R12 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 25, i32 0, i32 0, !remill_register !38 + %R13 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 27, i32 0, i32 0, !remill_register !37 + %R14 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !remill_register !35 + %R15 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 31, i32 0, i32 0, !remill_register !36 + %RBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !33 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 1 + %2 = load i64, ptr %RBP, align 8 + %rsp.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %3 = load i64, ptr %rsp.i.i, align 8, !tbaa !32 + %sub.i.i.i = add i64 %3, -8 + %4 = inttoptr i64 %sub.i.i.i to ptr + store i64 %2, ptr %4, align 8 + store i64 %sub.i.i.i, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %5 = add i64 %program_counter, 3 + %6 = load i64, ptr %R15, align 8 + %sub.i.i.i2 = add i64 %3, -16 + %7 = inttoptr i64 %sub.i.i.i2 to ptr + store i64 %6, ptr %7, align 8 + store i64 %sub.i.i.i2, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %5, ptr %PC, align 8 + %8 = add i64 %program_counter, 5 + %9 = load i64, ptr %R14, align 8 + %sub.i.i.i5 = add i64 %3, -24 + %10 = inttoptr i64 %sub.i.i.i5 to ptr + store i64 %9, ptr %10, align 8 + store i64 %sub.i.i.i5, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %8, ptr %PC, align 8 + %11 = add i64 %program_counter, 7 + %12 = load i64, ptr %R13, align 8 + %sub.i.i.i8 = add i64 %3, -32 + %13 = inttoptr i64 %sub.i.i.i8 to ptr + store i64 %12, ptr %13, align 8 + store i64 %sub.i.i.i8, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %11, ptr %PC, align 8 + %14 = add i64 %program_counter, 9 + %15 = load i64, ptr %R12, align 8 + %sub.i.i.i11 = add i64 %3, -40 + %16 = inttoptr i64 %sub.i.i.i11 to ptr + store i64 %15, ptr %16, align 8 + store i64 %sub.i.i.i11, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %14, ptr %PC, align 8 + %17 = add i64 %program_counter, 10 + %18 = load i64, ptr %RBX, align 8 + %sub.i.i.i14 = add i64 %3, -48 + %19 = inttoptr i64 %sub.i.i.i14 to ptr + store i64 %18, ptr %19, align 8 + store i64 %sub.i.i.i14, ptr %rsp.i.i, align 8, !tbaa !5 + store i64 %17, ptr %PC, align 8 + %20 = add i64 %program_counter, 17 + %21 = load i64, ptr %RSP, align 8 + %sub.i.i = add i64 %21, -248 + store i64 %sub.i.i, ptr %RSP, align 8, !tbaa !5 + %cmp.i.i.i = icmp ult i64 %21, 248 + %conv.i.i = zext i1 %cmp.i.i.i to i8 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 %conv.i.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i64 %sub.i.i to i8 + %22 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %23 = and i8 %22, 1 + %24 = xor i8 %23, 1 + %pf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %24, ptr %pf.i.i.i, align 1, !tbaa !27 + %25 = xor i64 %21, %sub.i.i + %26 = trunc i64 %25 to i8 + %27 = xor i8 %26, -1 + %28 = lshr i8 %27, 4 + %29 = and i8 %28, 1 + %af.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 %29, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i64 %21, 248 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + %zf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv5.i.i.i, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i64 %sub.i.i, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + %sf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv8.i.i.i, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i64 %21, 63 + %shr2.i.i.i.i = lshr i64 %sub.i.i, 63 + %xor3.i.i.i.i = xor i64 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i64 %xor3.i.i.i.i, %shr.i.i.i.i + %cmp.i29.i.i.i = icmp eq i64 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + %of.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 %conv11.i.i.i, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %20, ptr %PC, align 8 + %30 = add i64 %program_counter, 20 + %31 = load i64, ptr %RSI, align 8 + store i64 %31, ptr %RBX, align 8, !tbaa !5 + store i64 %30, ptr %PC, align 8 + %32 = add i64 %program_counter, 23 + %33 = load i32, ptr %EDI, align 4 + %sub.i.i17 = add i32 %33, -1 + %cmp.i.i.i18 = icmp eq i32 %33, 0 + %conv.i12.i = zext i1 %cmp.i.i.i18 to i8 + store i8 %conv.i12.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i21 = trunc i32 %sub.i.i17 to i8 + %34 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i21), !range !26 + %35 = and i8 %34, 1 + %36 = xor i8 %35, 1 + store i8 %36, ptr %pf.i.i.i, align 1, !tbaa !27 + %37 = xor i32 %33, %sub.i.i17 + %38 = trunc i32 %37 to i8 + %39 = lshr i8 %38, 4 + %40 = and i8 %39, 1 + store i8 %40, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i26 = icmp eq i32 %33, 1 + %conv5.i.i.i28 = zext i1 %cmp.i.i.i.i26 to i8 + store i8 %conv5.i.i.i28, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i30 = icmp slt i32 %sub.i.i17, 0 + %conv8.i.i.i32 = zext i1 %cmp.i27.i.i.i30 to i8 + store i8 %conv8.i.i.i32, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i34 = lshr i32 %33, 31 + %shr2.i.i.i.i35 = lshr i32 %sub.i.i17, 31 + %xor3.i.i.i.i36 = xor i32 %shr2.i.i.i.i35, %shr.i.i.i.i34 + %add.i.i.i.i37 = add nuw nsw i32 %xor3.i.i.i.i36, %shr.i.i.i.i34 + %cmp.i29.i.i.i38 = icmp eq i32 %add.i.i.i.i37, 2 + %conv11.i.i.i40 = zext i1 %cmp.i29.i.i.i38 to i8 + store i8 %conv11.i.i.i40, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %32, ptr %PC, align 8 + %41 = xor i1 %cmp.i27.i.i.i30, %cmp.i29.i.i.i38 + %.demorgan = or i1 %cmp.i.i.i.i26, %41 + %42 = xor i1 %.demorgan, true + %cond1.i.i.v = select i1 %42, i64 50, i64 25 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn +declare zeroext i1 @__remill_compare_sgt(i1 noundef zeroext) local_unnamed_addr #2 + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199184(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %RBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !33 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %R14 = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !remill_register !35 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 4202692, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + %3 = add i64 %program_counter, -720 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %4 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %4, -8 + %5 = inttoptr i64 %sub.i.i to ptr + store i64 %2, ptr %5, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %3, ptr %rip.i, align 8, !tbaa !5 + %6 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %2, ptr %PC, align 8 + %7 = add i64 %program_counter, 15 + store i64 3000, ptr %RDI, align 8, !tbaa !5 + store i64 %7, ptr %PC, align 8 + %8 = add i64 %program_counter, 20 + %9 = add i64 %program_counter, -432 + %10 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i2 = add i64 %10, -8 + %11 = inttoptr i64 %sub.i.i2 to ptr + store i64 %8, ptr %11, align 8 + store i64 %sub.i.i2, ptr %rsp.i, align 8, !tbaa !5 + store i64 %9, ptr %rip.i, align 8, !tbaa !5 + %12 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %8, ptr %PC, align 8 + %13 = add i64 %program_counter, 23 + %14 = load i64, ptr %R14, align 8 + store i64 %14, ptr %RDI, align 8, !tbaa !5 + store i64 %13, ptr %PC, align 8 + %15 = add i64 %program_counter, 26 + %16 = load i64, ptr %RBP, align 8 + store i64 %16, ptr %RSI, align 8, !tbaa !5 + store i64 %15, ptr %PC, align 8 + %17 = add i64 %program_counter, 31 + %18 = add i64 %program_counter, 768 + %19 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i6 = add i64 %19, -8 + %20 = inttoptr i64 %sub.i.i6 to ptr + store i64 %17, ptr %20, align 8 + store i64 %sub.i.i6, ptr %rsp.i, align 8, !tbaa !5 + store i64 %18, ptr %rip.i, align 8, !tbaa !5 + %21 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %17, ptr %PC, align 8 + %22 = add i64 %program_counter, 33 + %23 = load i32, ptr %EAX, align 4 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %23 to i8 + %24 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %25 = and i8 %24, 1 + %26 = xor i8 %25, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %26, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %23, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %23, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %22, ptr %PC, align 8 + %27 = add i64 %program_counter, 35 + %tobool.not.i = xor i1 %cmp.i.i.i, true + %cond1.i.i = select i1 %tobool.not.i, i64 %program_counter, i64 %27 + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199248(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !42 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 4202578, ptr %RSI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 7 + %3 = load i32, ptr %RBX, align 4 + %4 = zext i32 %3 to i64 + store i64 %4, ptr %RDI, align 8, !tbaa !5 + store i64 %2, ptr %PC, align 8 + %5 = add i64 %program_counter, 12 + store i64 5, ptr %RDX, align 8, !tbaa !5 + store i64 %5, ptr %PC, align 8 + %6 = add i64 %program_counter, 17 + %7 = add i64 %program_counter, 1824 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %8 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %8, -8 + %9 = inttoptr i64 %sub.i.i to ptr + store i64 %6, ptr %9, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %7, ptr %rip.i, align 8, !tbaa !5 + %10 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %6, ptr %PC, align 8 + %11 = add i64 %program_counter, 19 + %12 = load i32, ptr %RBX, align 4 + %13 = zext i32 %12 to i64 + store i64 %13, ptr %RDI, align 8, !tbaa !5 + store i64 %11, ptr %PC, align 8 + %14 = add i64 %program_counter, 24 + store i64 10, ptr %RSI, align 8, !tbaa !5 + store i64 %14, ptr %PC, align 8 + %15 = add i64 %program_counter, 29 + %16 = add i64 %program_counter, 1952 + %17 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i3 = add i64 %17, -8 + %18 = inttoptr i64 %sub.i.i3 to ptr + store i64 %15, ptr %18, align 8 + store i64 %sub.i.i3, ptr %rsp.i, align 8, !tbaa !5 + store i64 %16, ptr %rip.i, align 8, !tbaa !5 + %19 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %15, ptr %PC, align 8 + %20 = add i64 %program_counter, 31 + %21 = load i32, ptr %RBX, align 4 + %22 = zext i32 %21 to i64 + store i64 %22, ptr %RDI, align 8, !tbaa !5 + store i64 %20, ptr %PC, align 8 + %23 = add i64 %program_counter, 36 + store i64 1, ptr %RSI, align 8, !tbaa !5 + store i64 %23, ptr %PC, align 8 + %24 = add i64 %program_counter, 41 + %25 = add i64 %program_counter, 1488 + %26 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i8 = add i64 %26, -8 + %27 = inttoptr i64 %sub.i.i8 to ptr + store i64 %24, ptr %27, align 8 + store i64 %sub.i.i8, ptr %rsp.i, align 8, !tbaa !5 + store i64 %25, ptr %rip.i, align 8, !tbaa !5 + %28 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %24, ptr %PC, align 8 + %29 = add i64 %program_counter, 44 + %30 = load i64, ptr %RBX, align 8 + %conv.i.i = trunc i64 %30 to i32 + %add.i.i = add i32 %conv.i.i, 1 + %conv.i22.i = zext i32 %add.i.i to i64 + store i64 %conv.i22.i, ptr %RBX, align 8, !tbaa !5 + %31 = icmp eq i32 %conv.i.i, -1 + %conv.i23.i = zext i1 %31 to i8 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 %conv.i23.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i = trunc i32 %add.i.i to i8 + %32 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i), !range !26 + %33 = and i8 %32, 1 + %34 = xor i8 %33, 1 + %pf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %34, ptr %pf.i.i.i, align 1, !tbaa !27 + %35 = xor i32 %add.i.i, %conv.i.i + %36 = trunc i32 %35 to i8 + %37 = lshr i8 %36, 4 + %38 = and i8 %37, 1 + %af.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 %38, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i = icmp eq i32 %add.i.i, 0 + %conv5.i.i.i = zext i1 %cmp.i.i.i.i to i8 + %zf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv5.i.i.i, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i = icmp slt i32 %add.i.i, 0 + %conv8.i.i.i = zext i1 %cmp.i27.i.i.i to i8 + %sf.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv8.i.i.i, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i = lshr i32 %conv.i.i, 31 + %shr2.i.i.i.i = lshr i32 %add.i.i, 31 + %xor.i28.i.i.i = xor i32 %shr2.i.i.i.i, %shr.i.i.i.i + %add.i.i.i.i = add nuw nsw i32 %xor.i28.i.i.i, %shr2.i.i.i.i + %cmp.i29.i.i.i = icmp eq i32 %add.i.i.i.i, 2 + %conv11.i.i.i = zext i1 %cmp.i29.i.i.i to i8 + %of.i.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 %conv11.i.i.i, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %29, ptr %PC, align 8 + %39 = add i64 %program_counter, 47 + %40 = load i32, ptr %RBX, align 4 + %sub.i.i11 = add i32 %40, -5 + %cmp.i.i.i12 = icmp ult i32 %40, 5 + %conv.i12.i = zext i1 %cmp.i.i.i12 to i8 + store i8 %conv.i12.i, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i.i15 = trunc i32 %sub.i.i11 to i8 + %41 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i.i15), !range !26 + %42 = and i8 %41, 1 + %43 = xor i8 %42, 1 + store i8 %43, ptr %pf.i.i.i, align 1, !tbaa !27 + %44 = xor i32 %40, %sub.i.i11 + %45 = trunc i32 %44 to i8 + %46 = lshr i8 %45, 4 + %47 = and i8 %46, 1 + store i8 %47, ptr %af.i.i.i, align 1, !tbaa !31 + %cmp.i.i.i.i20 = icmp eq i32 %40, 5 + %conv5.i.i.i22 = zext i1 %cmp.i.i.i.i20 to i8 + store i8 %conv5.i.i.i22, ptr %zf.i.i.i, align 1, !tbaa !28 + %cmp.i27.i.i.i24 = icmp slt i32 %sub.i.i11, 0 + %conv8.i.i.i26 = zext i1 %cmp.i27.i.i.i24 to i8 + store i8 %conv8.i.i.i26, ptr %sf.i.i.i, align 1, !tbaa !29 + %shr.i.i.i.i28 = lshr i32 %40, 31 + %shr2.i.i.i.i29 = lshr i32 %sub.i.i11, 31 + %xor3.i.i.i.i = xor i32 %shr2.i.i.i.i29, %shr.i.i.i.i28 + %add.i.i.i.i30 = add nuw nsw i32 %xor3.i.i.i.i, %shr.i.i.i.i28 + %cmp.i29.i.i.i31 = icmp eq i32 %add.i.i.i.i30, 2 + %conv11.i.i.i33 = zext i1 %cmp.i29.i.i.i31 to i8 + store i8 %conv11.i.i.i33, ptr %of.i.i.i, align 1, !tbaa !30 + store i64 %39, ptr %PC, align 8 + %48 = add i64 %program_counter, 49 + %cond1.i.i = select i1 %cmp.i.i.i.i20, i64 %48, i64 %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199074(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %EAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !0 + %RDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !42 + %RBP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !remill_register !33 + %RBX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !remill_register !2 + %RCX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !remill_register !41 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 4202554, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + store i64 4202573, ptr %RSI, align 8, !tbaa !5 + store i64 %2, ptr %PC, align 8 + %3 = add i64 %program_counter, 15 + %4 = add i64 %program_counter, -354 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %5 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %5, -8 + %6 = inttoptr i64 %sub.i.i to ptr + store i64 %3, ptr %6, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %4, ptr %rip.i, align 8, !tbaa !5 + %7 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %3, ptr %PC, align 8 + %8 = add i64 %program_counter, 20 + %9 = load i64, ptr %RSP, align 8 + %10 = add i64 %9, 40 + %11 = load i64, ptr %EAX, align 8 + %12 = inttoptr i64 %10 to ptr + store i64 %11, ptr %12, align 8 + store i64 %8, ptr %PC, align 8 + %13 = add i64 %program_counter, 24 + %14 = load i64, ptr %RBX, align 8 + %15 = add i64 %14, 8 + %16 = inttoptr i64 %15 to ptr + %17 = load i64, ptr %16, align 8 + store i64 %17, ptr %RCX, align 8, !tbaa !5 + store i64 %13, ptr %PC, align 8 + %18 = add i64 %program_counter, 29 + %19 = add i64 %9, 86 + store i64 %19, ptr %RBP, align 8, !tbaa !5 + store i64 %18, ptr %PC, align 8 + %20 = add i64 %program_counter, 34 + store i64 10, ptr %RSI, align 8, !tbaa !5 + store i64 %20, ptr %PC, align 8 + %21 = add i64 %program_counter, 39 + store i64 4202575, ptr %RDX, align 8, !tbaa !5 + store i64 %21, ptr %PC, align 8 + %22 = add i64 %program_counter, 42 + store i64 %19, ptr %RDI, align 8, !tbaa !5 + store i64 %22, ptr %PC, align 8 + %23 = add i64 %program_counter, 44 + %24 = load i32, ptr %EAX, align 4 + %conv.i.i = trunc i64 %11 to i32 + %xor3.i.i = xor i32 %24, %conv.i.i + %conv.i27.i = zext i32 %xor3.i.i to i64 + store i64 %conv.i27.i, ptr %EAX, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %xor3.i.i to i8 + %25 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %26 = and i8 %25, 1 + %27 = xor i8 %26, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %27, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %xor3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %xor3.i.i, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %23, ptr %PC, align 8 + %28 = add i64 %program_counter, 49 + %29 = add i64 %program_counter, -514 + %30 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i4 = add i64 %30, -8 + %31 = inttoptr i64 %sub.i.i4 to ptr + store i64 %28, ptr %31, align 8 + store i64 %sub.i.i4, ptr %rsp.i, align 8, !tbaa !5 + store i64 %29, ptr %rip.i, align 8, !tbaa !5 + %32 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %28, ptr %PC, align 8 + %33 = add i64 %program_counter, 54 + store i64 1, ptr %RBX, align 8, !tbaa !5 + store i64 %33, ptr %PC, align 8 + %34 = add i64 %program_counter, 59 + store i64 29, ptr %RDI, align 8, !tbaa !5 + store i64 %34, ptr %PC, align 8 + %35 = add i64 %program_counter, 64 + store i64 3, ptr %RSI, align 8, !tbaa !5 + store i64 %35, ptr %PC, align 8 + %36 = add i64 %program_counter, 69 + store i64 1, ptr %RDX, align 8, !tbaa !5 + store i64 %36, ptr %PC, align 8 + %37 = add i64 %program_counter, 74 + %38 = add i64 %program_counter, -306 + %39 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i8 = add i64 %39, -8 + %40 = inttoptr i64 %sub.i.i8 to ptr + store i64 %37, ptr %40, align 8 + store i64 %sub.i.i8, ptr %rsp.i, align 8, !tbaa !5 + store i64 %38, ptr %rip.i, align 8, !tbaa !5 + %41 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %37, ptr %PC, align 8 + %42 = add i64 %program_counter, 79 + %43 = load i64, ptr %RSP, align 8 + %44 = add i64 %43, 32 + %45 = load i64, ptr %EAX, align 8 + %46 = inttoptr i64 %44 to ptr + store i64 %45, ptr %46, align 8 + store i64 %42, ptr %PC, align 8 + %47 = add i64 %program_counter, 83 + %48 = add i64 %43, 24 + %49 = load i32, ptr %EAX, align 4 + %50 = inttoptr i64 %48 to ptr + store i32 %49, ptr %50, align 4 + store i64 %47, ptr %PC, align 8 + %51 = add i64 %program_counter, 88 + store i64 %48, ptr %RDI, align 8, !tbaa !5 + store i64 %51, ptr %PC, align 8 + %52 = add i64 %program_counter, 91 + %53 = load i64, ptr %RBP, align 8 + store i64 %53, ptr %RSI, align 8, !tbaa !5 + store i64 %52, ptr %PC, align 8 + %54 = add i64 %program_counter, 96 + %55 = add i64 %program_counter, 878 + %56 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i14 = add i64 %56, -8 + %57 = inttoptr i64 %sub.i.i14 to ptr + store i64 %54, ptr %57, align 8 + store i64 %sub.i.i14, ptr %rsp.i, align 8, !tbaa !5 + store i64 %55, ptr %rip.i, align 8, !tbaa !5 + %58 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %54, ptr %PC, align 8 + %59 = add i64 %program_counter, 98 + %60 = load i32, ptr %EAX, align 4 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i18 = trunc i32 %60 to i8 + %61 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i18), !range !26 + %62 = and i8 %61, 1 + %63 = xor i8 %62, 1 + store i8 %63, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i20 = icmp eq i32 %60, 0 + %conv3.i.i22 = zext i1 %cmp.i.i.i20 to i8 + store i8 %conv3.i.i22, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i24 = icmp slt i32 %60, 0 + %conv6.i.i26 = zext i1 %cmp.i19.i.i24 to i8 + store i8 %conv6.i.i26, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %59, ptr %PC, align 8 + %cond1.i.i.v = select i1 %cmp.i.i.i20, i64 145, i64 100 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199470(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %RAX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !44 + %RSI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !remill_register !3 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %RDX = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !remill_register !42 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 16, ptr %RDX, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + %3 = load i64, ptr %RSP, align 8 + %4 = add i64 %3, 32 + %5 = inttoptr i64 %4 to ptr + %6 = load i64, ptr %5, align 8 + store i64 %6, ptr %RDI, align 8, !tbaa !5 + store i64 %2, ptr %PC, align 8 + %7 = add i64 %program_counter, 13 + store i64 %3, ptr %RSI, align 8, !tbaa !5 + store i64 %7, ptr %PC, align 8 + %8 = add i64 %program_counter, 18 + %9 = add i64 %program_counter, -862 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %10 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %10, -8 + %11 = inttoptr i64 %sub.i.i to ptr + store i64 %8, ptr %11, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %9, ptr %rip.i, align 8, !tbaa !5 + %12 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %8, ptr %PC, align 8 + %13 = add i64 %program_counter, 21 + %14 = load i64, ptr %RAX, align 8 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i64 %14 to i8 + %15 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %16 = and i8 %15, 1 + %17 = xor i8 %16, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %17, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i64 %14, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i64 %14, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %13, ptr %PC, align 8 + %cond1.i.i.v = select i1 %cmp.i19.i.i, i64 420, i64 27 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define internal fastcc ptr @basic_block_func4199890(ptr %state, i64 %program_counter, ptr %memory, ptr %next_pc_out) unnamed_addr #0 { + %AL = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !remill_register !39 + %RSP = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !remill_register !34 + %RDI = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !remill_register !1 + %PC = getelementptr inbounds %struct.State, ptr %state, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !remill_register !4 + store i64 %program_counter, ptr %PC, align 8 + %1 = add i64 %program_counter, 5 + store i64 4202583, ptr %RDI, align 8, !tbaa !5 + store i64 %1, ptr %PC, align 8 + %2 = add i64 %program_counter, 10 + %3 = add i64 %program_counter, -1154 + %rsp.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 13 + %4 = load i64, ptr %rsp.i, align 8, !tbaa !32 + %sub.i.i = add i64 %4, -8 + %5 = inttoptr i64 %sub.i.i to ptr + store i64 %2, ptr %5, align 8 + store i64 %sub.i.i, ptr %rsp.i, align 8, !tbaa !5 + %rip.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 6, i32 33 + store i64 %3, ptr %rip.i, align 8, !tbaa !5 + %6 = call ptr @__remill_function_call(ptr %state, i64 %program_counter, ptr %memory) + store i64 %2, ptr %PC, align 8 + %7 = add i64 %program_counter, 18 + %8 = load i64, ptr %RSP, align 8 + %9 = add i64 %8, 20 + %10 = inttoptr i64 %9 to ptr + store i32 1, ptr %10, align 4 + store i64 %7, ptr %PC, align 8 + %11 = add i64 %program_counter, 20 + %12 = load i64, ptr %AL, align 8 + %13 = load i32, ptr %AL, align 4 + %conv.i.i = trunc i64 %12 to i32 + %xor3.i.i = xor i32 %13, %conv.i.i + %conv.i27.i = zext i32 %xor3.i.i to i64 + store i64 %conv.i27.i, ptr %AL, align 8, !tbaa !5 + %cf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %conv.i.i.i = trunc i32 %xor3.i.i to i8 + %14 = call i8 @llvm.ctpop.i8(i8 %conv.i.i.i), !range !26 + %15 = and i8 %14, 1 + %16 = xor i8 %15, 1 + %pf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 3 + store i8 %16, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i = icmp eq i32 %xor3.i.i, 0 + %conv3.i.i = zext i1 %cmp.i.i.i to i8 + %zf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 7 + store i8 %conv3.i.i, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i = icmp slt i32 %xor3.i.i, 0 + %conv6.i.i = zext i1 %cmp.i19.i.i to i8 + %sf.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 9 + store i8 %conv6.i.i, ptr %sf.i.i, align 1, !tbaa !29 + %of.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 13 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + %af.i.i = getelementptr inbounds %struct.X86State, ptr %state, i64 0, i32 2, i32 5 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %11, ptr %PC, align 8 + %17 = add i64 %program_counter, 22 + %18 = load i8, ptr %AL, align 1 + store i8 0, ptr %cf.i.i, align 1, !tbaa !9 + %19 = call i8 @llvm.ctpop.i8(i8 %18), !range !26 + %20 = and i8 %19, 1 + %21 = xor i8 %20, 1 + store i8 %21, ptr %pf.i.i, align 1, !tbaa !27 + %cmp.i.i.i5 = icmp eq i8 %18, 0 + %conv3.i.i7 = zext i1 %cmp.i.i.i5 to i8 + store i8 %conv3.i.i7, ptr %zf.i.i, align 1, !tbaa !28 + %cmp.i19.i.i9 = icmp slt i8 %18, 0 + %conv6.i.i10 = zext i1 %cmp.i19.i.i9 to i8 + store i8 %conv6.i.i10, ptr %sf.i.i, align 1, !tbaa !29 + store i8 0, ptr %of.i.i, align 1, !tbaa !30 + store i8 undef, ptr %af.i.i, align 1, !tbaa !31 + store i64 %17, ptr %PC, align 8 + %tobool.not.i = xor i1 %cmp.i.i.i5, true + %cond1.i.i.v = select i1 %tobool.not.i, i64 -498, i64 28 + %cond1.i.i = add i64 %cond1.i.i.v, %program_counter + store i64 %cond1.i.i, ptr %next_pc_out, align 8 + ret ptr %memory +} + +; Function Attrs: noinline +define x86_stdcallcc i32 @sub_401270__AI_SI_B_64(i32 %0, ptr %1) local_unnamed_addr #0 !pc !54 { + %return_address = call ptr @llvm.returnaddress(i32 0), !pc !54 + %3 = ptrtoint ptr %return_address to i64, !pc !54 + %return_address_loc = alloca i64, align 8, !pc !54, !stack_offset !55 + %4 = ptrtoint ptr %return_address_loc to i64, !pc !54, !stack_offset !55 + %5 = load i64, ptr @__anvill_stack_0, align 8, !pc !54 + store i64 %5, ptr %return_address_loc, align 8, !pc !54 + %6 = alloca i64, align 8, !pc !54 + %7 = alloca %struct.State, align 8, !pc !54 + store i32 0, ptr %7, align 8, !pc !54 + %8 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 0, i32 1, !pc !54 + store i32 0, ptr %8, align 4, !pc !54 + %9 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 0, i32 2, i32 0, !pc !54 + store i64 0, ptr %9, align 8, !pc !54 + %10 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %10, align 8, !pc !54 + %11 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %11, align 8, !pc !54 + %12 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %12, align 8, !pc !54 + %13 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %13, align 8, !pc !54 + %14 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %14, align 8, !pc !54 + %15 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %15, align 8, !pc !54 + %16 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %16, align 8, !pc !54 + %17 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 0, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %17, align 8, !pc !54 + %18 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %18, align 8, !pc !54 + %19 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %19, align 8, !pc !54 + %20 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %20, align 8, !pc !54 + %21 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %21, align 8, !pc !54 + %22 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %22, align 8, !pc !54 + %23 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %23, align 8, !pc !54 + %24 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %24, align 8, !pc !54 + %25 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 1, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %25, align 8, !pc !54 + %26 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %26, align 8, !pc !54 + %27 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %27, align 8, !pc !54 + %28 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %28, align 8, !pc !54 + %29 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %29, align 8, !pc !54 + %30 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %30, align 8, !pc !54 + %31 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %31, align 8, !pc !54 + %32 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %32, align 8, !pc !54 + %33 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 2, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %33, align 8, !pc !54 + %34 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %34, align 8, !pc !54 + %35 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %35, align 8, !pc !54 + %36 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %36, align 8, !pc !54 + %37 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %37, align 8, !pc !54 + %38 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %38, align 8, !pc !54 + %39 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %39, align 8, !pc !54 + %40 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %40, align 8, !pc !54 + %41 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 3, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %41, align 8, !pc !54 + %42 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %42, align 8, !pc !54 + %43 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %43, align 8, !pc !54 + %44 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %44, align 8, !pc !54 + %45 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %45, align 8, !pc !54 + %46 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %46, align 8, !pc !54 + %47 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %47, align 8, !pc !54 + %48 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %48, align 8, !pc !54 + %49 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 4, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %49, align 8, !pc !54 + %50 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %50, align 8, !pc !54 + %51 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %51, align 8, !pc !54 + %52 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %52, align 8, !pc !54 + %53 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %53, align 8, !pc !54 + %54 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %54, align 8, !pc !54 + %55 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %55, align 8, !pc !54 + %56 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %56, align 8, !pc !54 + %57 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 5, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %57, align 8, !pc !54 + %58 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %58, align 8, !pc !54 + %59 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %59, align 8, !pc !54 + %60 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %60, align 8, !pc !54 + %61 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %61, align 8, !pc !54 + %62 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %62, align 8, !pc !54 + %63 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %63, align 8, !pc !54 + %64 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %64, align 8, !pc !54 + %65 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 6, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %65, align 8, !pc !54 + %66 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %66, align 8, !pc !54 + %67 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %67, align 8, !pc !54 + %68 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %68, align 8, !pc !54 + %69 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %69, align 8, !pc !54 + %70 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %70, align 8, !pc !54 + %71 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %71, align 8, !pc !54 + %72 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %72, align 8, !pc !54 + %73 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 7, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %73, align 8, !pc !54 + %74 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %74, align 8, !pc !54 + %75 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %75, align 8, !pc !54 + %76 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %76, align 8, !pc !54 + %77 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %77, align 8, !pc !54 + %78 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %78, align 8, !pc !54 + %79 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %79, align 8, !pc !54 + %80 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %80, align 8, !pc !54 + %81 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 8, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %81, align 8, !pc !54 + %82 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %82, align 8, !pc !54 + %83 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %83, align 8, !pc !54 + %84 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %84, align 8, !pc !54 + %85 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %85, align 8, !pc !54 + %86 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %86, align 8, !pc !54 + %87 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %87, align 8, !pc !54 + %88 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %88, align 8, !pc !54 + %89 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 9, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %89, align 8, !pc !54 + %90 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %90, align 8, !pc !54 + %91 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %91, align 8, !pc !54 + %92 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %92, align 8, !pc !54 + %93 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %93, align 8, !pc !54 + %94 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %94, align 8, !pc !54 + %95 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %95, align 8, !pc !54 + %96 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %96, align 8, !pc !54 + %97 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 10, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %97, align 8, !pc !54 + %98 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %98, align 8, !pc !54 + %99 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %99, align 8, !pc !54 + %100 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %100, align 8, !pc !54 + %101 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %101, align 8, !pc !54 + %102 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %102, align 8, !pc !54 + %103 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %103, align 8, !pc !54 + %104 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %104, align 8, !pc !54 + %105 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 11, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %105, align 8, !pc !54 + %106 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %106, align 8, !pc !54 + %107 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %107, align 8, !pc !54 + %108 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %108, align 8, !pc !54 + %109 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %109, align 8, !pc !54 + %110 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %110, align 8, !pc !54 + %111 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %111, align 8, !pc !54 + %112 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %112, align 8, !pc !54 + %113 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 12, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %113, align 8, !pc !54 + %114 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %114, align 8, !pc !54 + %115 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %115, align 8, !pc !54 + %116 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %116, align 8, !pc !54 + %117 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %117, align 8, !pc !54 + %118 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %118, align 8, !pc !54 + %119 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %119, align 8, !pc !54 + %120 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %120, align 8, !pc !54 + %121 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 13, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %121, align 8, !pc !54 + %122 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %122, align 8, !pc !54 + %123 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %123, align 8, !pc !54 + %124 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %124, align 8, !pc !54 + %125 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %125, align 8, !pc !54 + %126 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %126, align 8, !pc !54 + %127 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %127, align 8, !pc !54 + %128 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %128, align 8, !pc !54 + %129 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 14, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %129, align 8, !pc !54 + %130 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %130, align 8, !pc !54 + %131 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %131, align 8, !pc !54 + %132 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %132, align 8, !pc !54 + %133 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %133, align 8, !pc !54 + %134 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %134, align 8, !pc !54 + %135 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %135, align 8, !pc !54 + %136 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %136, align 8, !pc !54 + %137 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 15, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %137, align 8, !pc !54 + %138 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %138, align 8, !pc !54 + %139 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %139, align 8, !pc !54 + %140 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %140, align 8, !pc !54 + %141 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %141, align 8, !pc !54 + %142 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %142, align 8, !pc !54 + %143 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %143, align 8, !pc !54 + %144 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %144, align 8, !pc !54 + %145 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 16, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %145, align 8, !pc !54 + %146 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %146, align 8, !pc !54 + %147 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %147, align 8, !pc !54 + %148 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %148, align 8, !pc !54 + %149 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %149, align 8, !pc !54 + %150 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %150, align 8, !pc !54 + %151 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %151, align 8, !pc !54 + %152 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %152, align 8, !pc !54 + %153 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 17, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %153, align 8, !pc !54 + %154 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %154, align 8, !pc !54 + %155 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %155, align 8, !pc !54 + %156 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %156, align 8, !pc !54 + %157 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %157, align 8, !pc !54 + %158 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %158, align 8, !pc !54 + %159 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %159, align 8, !pc !54 + %160 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %160, align 8, !pc !54 + %161 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 18, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %161, align 8, !pc !54 + %162 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %162, align 8, !pc !54 + %163 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %163, align 8, !pc !54 + %164 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %164, align 8, !pc !54 + %165 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %165, align 8, !pc !54 + %166 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %166, align 8, !pc !54 + %167 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %167, align 8, !pc !54 + %168 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %168, align 8, !pc !54 + %169 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 19, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %169, align 8, !pc !54 + %170 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %170, align 8, !pc !54 + %171 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %171, align 8, !pc !54 + %172 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %172, align 8, !pc !54 + %173 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %173, align 8, !pc !54 + %174 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %174, align 8, !pc !54 + %175 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %175, align 8, !pc !54 + %176 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %176, align 8, !pc !54 + %177 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 20, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %177, align 8, !pc !54 + %178 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %178, align 8, !pc !54 + %179 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %179, align 8, !pc !54 + %180 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %180, align 8, !pc !54 + %181 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %181, align 8, !pc !54 + %182 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %182, align 8, !pc !54 + %183 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %183, align 8, !pc !54 + %184 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %184, align 8, !pc !54 + %185 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 21, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %185, align 8, !pc !54 + %186 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %186, align 8, !pc !54 + %187 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %187, align 8, !pc !54 + %188 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %188, align 8, !pc !54 + %189 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %189, align 8, !pc !54 + %190 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %190, align 8, !pc !54 + %191 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %191, align 8, !pc !54 + %192 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %192, align 8, !pc !54 + %193 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 22, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %193, align 8, !pc !54 + %194 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %194, align 8, !pc !54 + %195 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %195, align 8, !pc !54 + %196 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %196, align 8, !pc !54 + %197 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %197, align 8, !pc !54 + %198 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %198, align 8, !pc !54 + %199 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %199, align 8, !pc !54 + %200 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %200, align 8, !pc !54 + %201 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 23, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %201, align 8, !pc !54 + %202 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %202, align 8, !pc !54 + %203 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %203, align 8, !pc !54 + %204 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %204, align 8, !pc !54 + %205 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %205, align 8, !pc !54 + %206 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %206, align 8, !pc !54 + %207 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %207, align 8, !pc !54 + %208 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %208, align 8, !pc !54 + %209 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 24, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %209, align 8, !pc !54 + %210 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %210, align 8, !pc !54 + %211 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %211, align 8, !pc !54 + %212 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %212, align 8, !pc !54 + %213 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %213, align 8, !pc !54 + %214 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %214, align 8, !pc !54 + %215 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %215, align 8, !pc !54 + %216 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %216, align 8, !pc !54 + %217 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 25, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %217, align 8, !pc !54 + %218 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %218, align 8, !pc !54 + %219 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %219, align 8, !pc !54 + %220 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %220, align 8, !pc !54 + %221 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %221, align 8, !pc !54 + %222 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %222, align 8, !pc !54 + %223 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %223, align 8, !pc !54 + %224 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %224, align 8, !pc !54 + %225 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 26, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %225, align 8, !pc !54 + %226 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %226, align 8, !pc !54 + %227 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %227, align 8, !pc !54 + %228 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %228, align 8, !pc !54 + %229 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %229, align 8, !pc !54 + %230 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %230, align 8, !pc !54 + %231 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %231, align 8, !pc !54 + %232 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %232, align 8, !pc !54 + %233 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 27, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %233, align 8, !pc !54 + %234 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %234, align 8, !pc !54 + %235 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %235, align 8, !pc !54 + %236 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %236, align 8, !pc !54 + %237 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %237, align 8, !pc !54 + %238 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %238, align 8, !pc !54 + %239 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %239, align 8, !pc !54 + %240 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %240, align 8, !pc !54 + %241 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 28, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %241, align 8, !pc !54 + %242 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %242, align 8, !pc !54 + %243 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %243, align 8, !pc !54 + %244 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %244, align 8, !pc !54 + %245 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %245, align 8, !pc !54 + %246 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %246, align 8, !pc !54 + %247 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %247, align 8, !pc !54 + %248 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %248, align 8, !pc !54 + %249 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 29, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %249, align 8, !pc !54 + %250 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %250, align 8, !pc !54 + %251 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %251, align 8, !pc !54 + %252 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %252, align 8, !pc !54 + %253 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %253, align 8, !pc !54 + %254 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %254, align 8, !pc !54 + %255 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %255, align 8, !pc !54 + %256 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %256, align 8, !pc !54 + %257 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 30, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %257, align 8, !pc !54 + %258 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %258, align 8, !pc !54 + %259 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i64 0, ptr %259, align 8, !pc !54 + %260 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i64 0, ptr %260, align 8, !pc !54 + %261 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i64 0, ptr %261, align 8, !pc !54 + %262 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i64 0, ptr %262, align 8, !pc !54 + %263 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i64 0, ptr %263, align 8, !pc !54 + %264 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i64 0, ptr %264, align 8, !pc !54 + %265 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 1, i64 31, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i64 0, ptr %265, align 8, !pc !54 + %266 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 0, !pc !54 + store i8 0, ptr %266, align 8, !pc !54 + %267 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 1, !pc !54 + store i8 0, ptr %267, align 1, !pc !54 + %268 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 2, !pc !54 + store i8 0, ptr %268, align 2, !pc !54 + %269 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 3, !pc !54 + store i8 0, ptr %269, align 1, !pc !54 + %270 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 4, !pc !54 + store i8 0, ptr %270, align 4, !pc !54 + %271 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 5, !pc !54 + store i8 0, ptr %271, align 1, !pc !54 + %272 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 6, !pc !54 + store i8 0, ptr %272, align 2, !pc !54 + %273 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 7, !pc !54 + store i8 0, ptr %273, align 1, !pc !54 + %274 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 8, !pc !54 + store i8 0, ptr %274, align 8, !pc !54 + %275 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 9, !pc !54 + store i8 0, ptr %275, align 1, !pc !54 + %276 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 10, !pc !54 + store i8 0, ptr %276, align 2, !pc !54 + %277 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 11, !pc !54 + store i8 0, ptr %277, align 1, !pc !54 + %278 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 12, !pc !54 + store i8 0, ptr %278, align 4, !pc !54 + %279 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 13, !pc !54 + store i8 0, ptr %279, align 1, !pc !54 + %280 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 14, !pc !54 + store i8 0, ptr %280, align 2, !pc !54 + %281 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 2, i32 15, !pc !54 + store i8 0, ptr %281, align 1, !pc !54 + %282 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 3, i32 0, !pc !54 + store i64 0, ptr %282, align 8, !pc !54 + %283 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 0, !pc !54 + store i16 0, ptr %283, align 8, !pc !54 + %284 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 1, i32 0, !pc !54 + store i16 0, ptr %284, align 2, !pc !54 + %285 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 2, !pc !54 + store i16 0, ptr %285, align 4, !pc !54 + %286 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 3, i32 0, !pc !54 + store i16 0, ptr %286, align 2, !pc !54 + %287 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 4, !pc !54 + store i16 0, ptr %287, align 8, !pc !54 + %288 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 5, i32 0, !pc !54 + store i16 0, ptr %288, align 2, !pc !54 + %289 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 6, !pc !54 + store i16 0, ptr %289, align 4, !pc !54 + %290 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 7, i32 0, !pc !54 + store i16 0, ptr %290, align 2, !pc !54 + %291 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 8, !pc !54 + store i16 0, ptr %291, align 8, !pc !54 + %292 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 9, i32 0, !pc !54 + store i16 0, ptr %292, align 2, !pc !54 + %293 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 10, !pc !54 + store i16 0, ptr %293, align 4, !pc !54 + %294 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 4, i32 11, i32 0, !pc !54 + store i16 0, ptr %294, align 2, !pc !54 + %295 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 0, !pc !54 + store i64 0, ptr %295, align 8, !pc !54 + %296 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 1, i32 0, i32 0, !pc !54 + store i64 0, ptr %296, align 8, !pc !54 + %297 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 2, !pc !54 + store i64 0, ptr %297, align 8, !pc !54 + %298 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 3, i32 0, i32 0, !pc !54 + store i64 0, ptr %298, align 8, !pc !54 + %299 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 4, !pc !54 + store i64 0, ptr %299, align 8, !pc !54 + %300 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 5, i32 0, i32 0, !pc !54 + store i64 0, ptr %300, align 8, !pc !54 + %301 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 6, !pc !54 + store i64 0, ptr %301, align 8, !pc !54 + %302 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 7, i32 0, i32 0, !pc !54 + store i64 0, ptr %302, align 8, !pc !54 + %303 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 8, !pc !54 + store i64 0, ptr %303, align 8, !pc !54 + %304 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 9, i32 0, i32 0, !pc !54 + store i64 0, ptr %304, align 8, !pc !54 + %305 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 10, !pc !54 + store i64 0, ptr %305, align 8, !pc !54 + %306 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 5, i32 11, i32 0, i32 0, !pc !54 + store i64 0, ptr %306, align 8, !pc !54 + %307 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 0, !pc !54 + store i64 0, ptr %307, align 8, !pc !54 + %308 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 1, i32 0, i32 0, !pc !54 + store i64 0, ptr %308, align 8, !pc !54 + %309 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 2, !pc !54 + store i64 0, ptr %309, align 8, !pc !54 + %310 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 3, i32 0, i32 0, !pc !54 + store i64 0, ptr %310, align 8, !pc !54 + %311 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 4, !pc !54 + store i64 0, ptr %311, align 8, !pc !54 + %312 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 5, i32 0, i32 0, !pc !54 + store i64 0, ptr %312, align 8, !pc !54 + %313 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 6, !pc !54 + store i64 0, ptr %313, align 8, !pc !54 + %314 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 7, i32 0, i32 0, !pc !54 + store i64 0, ptr %314, align 8, !pc !54 + %315 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 8, !pc !54 + store i64 0, ptr %315, align 8, !pc !54 + %316 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 9, i32 0, i32 0, !pc !54 + store i64 0, ptr %316, align 8, !pc !54 + %317 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 10, !pc !54 + store i64 0, ptr %317, align 8, !pc !54 + %318 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 11, i32 0, i32 0, !pc !54 + store i64 0, ptr %318, align 8, !pc !54 + %319 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 12, !pc !54 + store i64 0, ptr %319, align 8, !pc !54 + %320 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 13, i32 0, i32 0, !pc !54 + store i64 0, ptr %320, align 8, !pc !54 + %321 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 14, !pc !54 + store i64 0, ptr %321, align 8, !pc !54 + %322 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 15, i32 0, i32 0, !pc !54 + store i64 0, ptr %322, align 8, !pc !54 + %323 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 16, !pc !54 + store i64 0, ptr %323, align 8, !pc !54 + %324 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 17, i32 0, i32 0, !pc !54 + store i64 0, ptr %324, align 8, !pc !54 + %325 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 18, !pc !54 + store i64 0, ptr %325, align 8, !pc !54 + %326 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 19, i32 0, i32 0, !pc !54 + store i64 0, ptr %326, align 8, !pc !54 + %327 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 20, !pc !54 + store i64 0, ptr %327, align 8, !pc !54 + %328 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 21, i32 0, i32 0, !pc !54 + store i64 0, ptr %328, align 8, !pc !54 + %329 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 22, !pc !54 + store i64 0, ptr %329, align 8, !pc !54 + %330 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 23, i32 0, i32 0, !pc !54 + store i64 0, ptr %330, align 8, !pc !54 + %331 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 24, !pc !54 + store i64 0, ptr %331, align 8, !pc !54 + %332 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 25, i32 0, i32 0, !pc !54 + store i64 0, ptr %332, align 8, !pc !54 + %333 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 26, !pc !54 + store i64 0, ptr %333, align 8, !pc !54 + %334 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 27, i32 0, i32 0, !pc !54 + store i64 0, ptr %334, align 8, !pc !54 + %335 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 28, !pc !54 + store i64 0, ptr %335, align 8, !pc !54 + %336 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 29, i32 0, i32 0, !pc !54 + store i64 0, ptr %336, align 8, !pc !54 + %337 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 30, !pc !54 + store i64 0, ptr %337, align 8, !pc !54 + %338 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 31, i32 0, i32 0, !pc !54 + store i64 0, ptr %338, align 8, !pc !54 + %339 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 32, !pc !54 + store i64 0, ptr %339, align 8, !pc !54 + %340 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 6, i32 33, i32 0, i32 0, !pc !54 + store i64 0, ptr %340, align 8, !pc !54 + %341 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %341, align 8, !pc !54 + %342 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %342, align 1, !pc !54 + %343 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %343, align 2, !pc !54 + %344 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %344, align 1, !pc !54 + %345 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %345, align 4, !pc !54 + %346 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %346, align 1, !pc !54 + %347 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %347, align 2, !pc !54 + %348 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %348, align 1, !pc !54 + %349 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %349, align 8, !pc !54 + %350 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %350, align 1, !pc !54 + %351 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %351, align 2, !pc !54 + %352 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %352, align 1, !pc !54 + %353 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %353, align 4, !pc !54 + %354 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %354, align 1, !pc !54 + %355 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %355, align 2, !pc !54 + %356 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 0, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %356, align 1, !pc !54 + %357 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %357, align 8, !pc !54 + %358 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %358, align 1, !pc !54 + %359 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %359, align 2, !pc !54 + %360 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %360, align 1, !pc !54 + %361 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %361, align 4, !pc !54 + %362 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %362, align 1, !pc !54 + %363 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %363, align 2, !pc !54 + %364 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %364, align 1, !pc !54 + %365 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %365, align 8, !pc !54 + %366 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %366, align 1, !pc !54 + %367 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %367, align 2, !pc !54 + %368 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %368, align 1, !pc !54 + %369 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %369, align 4, !pc !54 + %370 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %370, align 1, !pc !54 + %371 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %371, align 2, !pc !54 + %372 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 1, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %372, align 1, !pc !54 + %373 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 0, !pc !54 + store i8 0, ptr %373, align 8, !pc !54 + %374 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 1, !pc !54 + store i8 0, ptr %374, align 1, !pc !54 + %375 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 2, !pc !54 + store i8 0, ptr %375, align 2, !pc !54 + %376 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 3, !pc !54 + store i8 0, ptr %376, align 1, !pc !54 + %377 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 4, !pc !54 + store i8 0, ptr %377, align 4, !pc !54 + %378 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 0, i64 5, !pc !54 + store i8 0, ptr %378, align 1, !pc !54 + %379 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %379, align 2, !pc !54 + %380 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %380, align 1, !pc !54 + %381 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %381, align 8, !pc !54 + %382 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %382, align 1, !pc !54 + %383 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %383, align 2, !pc !54 + %384 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %384, align 1, !pc !54 + %385 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %385, align 4, !pc !54 + %386 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %386, align 1, !pc !54 + %387 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %387, align 2, !pc !54 + %388 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 2, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %388, align 1, !pc !54 + %389 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 0, !pc !54 + store i8 0, ptr %389, align 8, !pc !54 + %390 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 1, !pc !54 + store i8 0, ptr %390, align 1, !pc !54 + %391 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 2, !pc !54 + store i8 0, ptr %391, align 2, !pc !54 + %392 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 3, !pc !54 + store i8 0, ptr %392, align 1, !pc !54 + %393 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 4, !pc !54 + store i8 0, ptr %393, align 4, !pc !54 + %394 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 0, i64 5, !pc !54 + store i8 0, ptr %394, align 1, !pc !54 + %395 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %395, align 2, !pc !54 + %396 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %396, align 1, !pc !54 + %397 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %397, align 8, !pc !54 + %398 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %398, align 1, !pc !54 + %399 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %399, align 2, !pc !54 + %400 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %400, align 1, !pc !54 + %401 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %401, align 4, !pc !54 + %402 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %402, align 1, !pc !54 + %403 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %403, align 2, !pc !54 + %404 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 3, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %404, align 1, !pc !54 + %405 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 0, !pc !54 + store i8 0, ptr %405, align 8, !pc !54 + %406 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 1, !pc !54 + store i8 0, ptr %406, align 1, !pc !54 + %407 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 2, !pc !54 + store i8 0, ptr %407, align 2, !pc !54 + %408 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 3, !pc !54 + store i8 0, ptr %408, align 1, !pc !54 + %409 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 4, !pc !54 + store i8 0, ptr %409, align 4, !pc !54 + %410 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 0, i64 5, !pc !54 + store i8 0, ptr %410, align 1, !pc !54 + %411 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %411, align 2, !pc !54 + %412 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %412, align 1, !pc !54 + %413 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %413, align 8, !pc !54 + %414 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %414, align 1, !pc !54 + %415 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %415, align 2, !pc !54 + %416 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %416, align 1, !pc !54 + %417 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %417, align 4, !pc !54 + %418 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %418, align 1, !pc !54 + %419 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %419, align 2, !pc !54 + %420 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 4, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %420, align 1, !pc !54 + %421 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 0, !pc !54 + store i8 0, ptr %421, align 8, !pc !54 + %422 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 1, !pc !54 + store i8 0, ptr %422, align 1, !pc !54 + %423 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 2, !pc !54 + store i8 0, ptr %423, align 2, !pc !54 + %424 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 3, !pc !54 + store i8 0, ptr %424, align 1, !pc !54 + %425 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 4, !pc !54 + store i8 0, ptr %425, align 4, !pc !54 + %426 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 0, i64 5, !pc !54 + store i8 0, ptr %426, align 1, !pc !54 + %427 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %427, align 2, !pc !54 + %428 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %428, align 1, !pc !54 + %429 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %429, align 8, !pc !54 + %430 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %430, align 1, !pc !54 + %431 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %431, align 2, !pc !54 + %432 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %432, align 1, !pc !54 + %433 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %433, align 4, !pc !54 + %434 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %434, align 1, !pc !54 + %435 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %435, align 2, !pc !54 + %436 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 5, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %436, align 1, !pc !54 + %437 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 0, !pc !54 + store i8 0, ptr %437, align 8, !pc !54 + %438 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 1, !pc !54 + store i8 0, ptr %438, align 1, !pc !54 + %439 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 2, !pc !54 + store i8 0, ptr %439, align 2, !pc !54 + %440 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 3, !pc !54 + store i8 0, ptr %440, align 1, !pc !54 + %441 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 4, !pc !54 + store i8 0, ptr %441, align 4, !pc !54 + %442 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 0, i64 5, !pc !54 + store i8 0, ptr %442, align 1, !pc !54 + %443 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %443, align 2, !pc !54 + %444 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %444, align 1, !pc !54 + %445 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %445, align 8, !pc !54 + %446 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %446, align 1, !pc !54 + %447 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %447, align 2, !pc !54 + %448 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %448, align 1, !pc !54 + %449 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %449, align 4, !pc !54 + %450 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %450, align 1, !pc !54 + %451 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %451, align 2, !pc !54 + %452 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 6, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %452, align 1, !pc !54 + %453 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 0, !pc !54 + store i8 0, ptr %453, align 8, !pc !54 + %454 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 1, !pc !54 + store i8 0, ptr %454, align 1, !pc !54 + %455 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 2, !pc !54 + store i8 0, ptr %455, align 2, !pc !54 + %456 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 3, !pc !54 + store i8 0, ptr %456, align 1, !pc !54 + %457 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 4, !pc !54 + store i8 0, ptr %457, align 4, !pc !54 + %458 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 0, i64 5, !pc !54 + store i8 0, ptr %458, align 1, !pc !54 + %459 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 0, !pc !54 + store i8 0, ptr %459, align 2, !pc !54 + %460 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 1, !pc !54 + store i8 0, ptr %460, align 1, !pc !54 + %461 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 2, !pc !54 + store i8 0, ptr %461, align 8, !pc !54 + %462 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 3, !pc !54 + store i8 0, ptr %462, align 1, !pc !54 + %463 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 4, !pc !54 + store i8 0, ptr %463, align 2, !pc !54 + %464 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 5, !pc !54 + store i8 0, ptr %464, align 1, !pc !54 + %465 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 6, !pc !54 + store i8 0, ptr %465, align 4, !pc !54 + %466 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 7, !pc !54 + store i8 0, ptr %466, align 1, !pc !54 + %467 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 8, !pc !54 + store i8 0, ptr %467, align 2, !pc !54 + %468 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 7, i32 0, i64 7, i32 1, i32 0, i64 9, !pc !54 + store i8 0, ptr %468, align 1, !pc !54 + %469 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 0, i32 0, !pc !54 + store i64 0, ptr %469, align 8, !pc !54 + %470 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 0, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %470, align 8, !pc !54 + %471 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 1, i32 0, !pc !54 + store i64 0, ptr %471, align 8, !pc !54 + %472 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 1, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %472, align 8, !pc !54 + %473 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 2, i32 0, !pc !54 + store i64 0, ptr %473, align 8, !pc !54 + %474 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 2, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %474, align 8, !pc !54 + %475 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 3, i32 0, !pc !54 + store i64 0, ptr %475, align 8, !pc !54 + %476 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 3, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %476, align 8, !pc !54 + %477 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 4, i32 0, !pc !54 + store i64 0, ptr %477, align 8, !pc !54 + %478 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 4, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %478, align 8, !pc !54 + %479 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 5, i32 0, !pc !54 + store i64 0, ptr %479, align 8, !pc !54 + %480 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 5, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %480, align 8, !pc !54 + %481 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 6, i32 0, !pc !54 + store i64 0, ptr %481, align 8, !pc !54 + %482 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 6, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %482, align 8, !pc !54 + %483 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 7, i32 0, !pc !54 + store i64 0, ptr %483, align 8, !pc !54 + %484 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 8, i32 0, i64 7, i32 1, i32 0, i32 0, i64 0, !pc !54 + store i64 0, ptr %484, align 8, !pc !54 + %485 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 0, !pc !54 + store i8 0, ptr %485, align 8, !pc !54 + %486 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 1, !pc !54 + store i8 0, ptr %486, align 1, !pc !54 + %487 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 2, !pc !54 + store i8 0, ptr %487, align 2, !pc !54 + %488 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 3, !pc !54 + store i8 0, ptr %488, align 1, !pc !54 + %489 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 4, !pc !54 + store i8 0, ptr %489, align 4, !pc !54 + %490 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 5, !pc !54 + store i8 0, ptr %490, align 1, !pc !54 + %491 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 6, !pc !54 + store i8 0, ptr %491, align 2, !pc !54 + %492 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 7, !pc !54 + store i8 0, ptr %492, align 1, !pc !54 + %493 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 8, !pc !54 + store i8 0, ptr %493, align 8, !pc !54 + %494 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 9, !pc !54 + store i8 0, ptr %494, align 1, !pc !54 + %495 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 10, !pc !54 + store i8 0, ptr %495, align 2, !pc !54 + %496 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 11, !pc !54 + store i8 0, ptr %496, align 1, !pc !54 + %497 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 12, !pc !54 + store i8 0, ptr %497, align 4, !pc !54 + %498 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 13, !pc !54 + store i8 0, ptr %498, align 1, !pc !54 + %499 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 14, !pc !54 + store i8 0, ptr %499, align 2, !pc !54 + %500 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 15, !pc !54 + store i8 0, ptr %500, align 1, !pc !54 + %501 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 16, !pc !54 + store i8 0, ptr %501, align 8, !pc !54 + %502 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 17, !pc !54 + store i8 0, ptr %502, align 1, !pc !54 + %503 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 18, !pc !54 + store i8 0, ptr %503, align 2, !pc !54 + %504 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 19, !pc !54 + store i8 0, ptr %504, align 1, !pc !54 + %505 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 20, i64 0, !pc !54 + store i8 0, ptr %505, align 4, !pc !54 + %506 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 20, i64 1, !pc !54 + store i8 0, ptr %506, align 1, !pc !54 + %507 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 20, i64 2, !pc !54 + store i8 0, ptr %507, align 2, !pc !54 + %508 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 9, i32 20, i64 3, !pc !54 + store i8 0, ptr %508, align 1, !pc !54 + %509 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 10, i32 0, !pc !54 + store i64 0, ptr %509, align 8, !pc !54 + %510 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, !pc !54 + store i16 0, ptr %510, align 8, !pc !54 + %511 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 1, i32 0, !pc !54 + store i16 0, ptr %511, align 2, !pc !54 + %512 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 2, i32 0, !pc !54 + store i8 0, ptr %512, align 4, !pc !54 + %513 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 3, !pc !54 + store i8 0, ptr %513, align 1, !pc !54 + %514 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 4, !pc !54 + store i16 0, ptr %514, align 2, !pc !54 + %515 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 5, !pc !54 + store i32 0, ptr %515, align 8, !pc !54 + %516 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 6, i32 0, !pc !54 + store i16 0, ptr %516, align 4, !pc !54 + %517 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 7, !pc !54 + store i16 0, ptr %517, align 2, !pc !54 + %518 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 8, !pc !54 + store i32 0, ptr %518, align 8, !pc !54 + %519 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 9, i32 0, !pc !54 + store i16 0, ptr %519, align 4, !pc !54 + %520 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 10, !pc !54 + store i16 0, ptr %520, align 2, !pc !54 + %521 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 11, i32 0, !pc !54 + store i32 0, ptr %521, align 8, !pc !54 + %522 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 12, i32 0, !pc !54 + store i32 0, ptr %522, align 4, !pc !54 + %523 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %523, align 8, !pc !54 + %524 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %524, align 1, !pc !54 + %525 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %525, align 2, !pc !54 + %526 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %526, align 1, !pc !54 + %527 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %527, align 4, !pc !54 + %528 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %528, align 1, !pc !54 + %529 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %529, align 2, !pc !54 + %530 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %530, align 1, !pc !54 + %531 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %531, align 8, !pc !54 + %532 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %532, align 1, !pc !54 + %533 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 0, !pc !54 + store i8 0, ptr %533, align 2, !pc !54 + %534 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 1, !pc !54 + store i8 0, ptr %534, align 1, !pc !54 + %535 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 2, !pc !54 + store i8 0, ptr %535, align 4, !pc !54 + %536 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 3, !pc !54 + store i8 0, ptr %536, align 1, !pc !54 + %537 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 4, !pc !54 + store i8 0, ptr %537, align 2, !pc !54 + %538 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 0, i32 1, i64 5, !pc !54 + store i8 0, ptr %538, align 1, !pc !54 + %539 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %539, align 8, !pc !54 + %540 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %540, align 1, !pc !54 + %541 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %541, align 2, !pc !54 + %542 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %542, align 1, !pc !54 + %543 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %543, align 4, !pc !54 + %544 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %544, align 1, !pc !54 + %545 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %545, align 2, !pc !54 + %546 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %546, align 1, !pc !54 + %547 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %547, align 8, !pc !54 + %548 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %548, align 1, !pc !54 + %549 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 0, !pc !54 + store i8 0, ptr %549, align 2, !pc !54 + %550 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 1, !pc !54 + store i8 0, ptr %550, align 1, !pc !54 + %551 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 2, !pc !54 + store i8 0, ptr %551, align 4, !pc !54 + %552 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 3, !pc !54 + store i8 0, ptr %552, align 1, !pc !54 + %553 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 4, !pc !54 + store i8 0, ptr %553, align 2, !pc !54 + %554 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 1, i32 1, i64 5, !pc !54 + store i8 0, ptr %554, align 1, !pc !54 + %555 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %555, align 8, !pc !54 + %556 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %556, align 1, !pc !54 + %557 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %557, align 2, !pc !54 + %558 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %558, align 1, !pc !54 + %559 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %559, align 4, !pc !54 + %560 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %560, align 1, !pc !54 + %561 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %561, align 2, !pc !54 + %562 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %562, align 1, !pc !54 + %563 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %563, align 8, !pc !54 + %564 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %564, align 1, !pc !54 + %565 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 0, !pc !54 + store i8 0, ptr %565, align 2, !pc !54 + %566 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 1, !pc !54 + store i8 0, ptr %566, align 1, !pc !54 + %567 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 2, !pc !54 + store i8 0, ptr %567, align 4, !pc !54 + %568 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 3, !pc !54 + store i8 0, ptr %568, align 1, !pc !54 + %569 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 4, !pc !54 + store i8 0, ptr %569, align 2, !pc !54 + %570 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 2, i32 1, i64 5, !pc !54 + store i8 0, ptr %570, align 1, !pc !54 + %571 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %571, align 8, !pc !54 + %572 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %572, align 1, !pc !54 + %573 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %573, align 2, !pc !54 + %574 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %574, align 1, !pc !54 + %575 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %575, align 4, !pc !54 + %576 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %576, align 1, !pc !54 + %577 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %577, align 2, !pc !54 + %578 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %578, align 1, !pc !54 + %579 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %579, align 8, !pc !54 + %580 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %580, align 1, !pc !54 + %581 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 0, !pc !54 + store i8 0, ptr %581, align 2, !pc !54 + %582 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 1, !pc !54 + store i8 0, ptr %582, align 1, !pc !54 + %583 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 2, !pc !54 + store i8 0, ptr %583, align 4, !pc !54 + %584 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 3, !pc !54 + store i8 0, ptr %584, align 1, !pc !54 + %585 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 4, !pc !54 + store i8 0, ptr %585, align 2, !pc !54 + %586 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 3, i32 1, i64 5, !pc !54 + store i8 0, ptr %586, align 1, !pc !54 + %587 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %587, align 8, !pc !54 + %588 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %588, align 1, !pc !54 + %589 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %589, align 2, !pc !54 + %590 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %590, align 1, !pc !54 + %591 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %591, align 4, !pc !54 + %592 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %592, align 1, !pc !54 + %593 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %593, align 2, !pc !54 + %594 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %594, align 1, !pc !54 + %595 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %595, align 8, !pc !54 + %596 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %596, align 1, !pc !54 + %597 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 0, !pc !54 + store i8 0, ptr %597, align 2, !pc !54 + %598 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 1, !pc !54 + store i8 0, ptr %598, align 1, !pc !54 + %599 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 2, !pc !54 + store i8 0, ptr %599, align 4, !pc !54 + %600 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 3, !pc !54 + store i8 0, ptr %600, align 1, !pc !54 + %601 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 4, !pc !54 + store i8 0, ptr %601, align 2, !pc !54 + %602 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 4, i32 1, i64 5, !pc !54 + store i8 0, ptr %602, align 1, !pc !54 + %603 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %603, align 8, !pc !54 + %604 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %604, align 1, !pc !54 + %605 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %605, align 2, !pc !54 + %606 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %606, align 1, !pc !54 + %607 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %607, align 4, !pc !54 + %608 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %608, align 1, !pc !54 + %609 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %609, align 2, !pc !54 + %610 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %610, align 1, !pc !54 + %611 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %611, align 8, !pc !54 + %612 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %612, align 1, !pc !54 + %613 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 0, !pc !54 + store i8 0, ptr %613, align 2, !pc !54 + %614 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 1, !pc !54 + store i8 0, ptr %614, align 1, !pc !54 + %615 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 2, !pc !54 + store i8 0, ptr %615, align 4, !pc !54 + %616 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 3, !pc !54 + store i8 0, ptr %616, align 1, !pc !54 + %617 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 4, !pc !54 + store i8 0, ptr %617, align 2, !pc !54 + %618 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 5, i32 1, i64 5, !pc !54 + store i8 0, ptr %618, align 1, !pc !54 + %619 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %619, align 8, !pc !54 + %620 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %620, align 1, !pc !54 + %621 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %621, align 2, !pc !54 + %622 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %622, align 1, !pc !54 + %623 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %623, align 4, !pc !54 + %624 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %624, align 1, !pc !54 + %625 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %625, align 2, !pc !54 + %626 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %626, align 1, !pc !54 + %627 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %627, align 8, !pc !54 + %628 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %628, align 1, !pc !54 + %629 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 0, !pc !54 + store i8 0, ptr %629, align 2, !pc !54 + %630 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 1, !pc !54 + store i8 0, ptr %630, align 1, !pc !54 + %631 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 2, !pc !54 + store i8 0, ptr %631, align 4, !pc !54 + %632 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 3, !pc !54 + store i8 0, ptr %632, align 1, !pc !54 + %633 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 4, !pc !54 + store i8 0, ptr %633, align 2, !pc !54 + %634 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 6, i32 1, i64 5, !pc !54 + store i8 0, ptr %634, align 1, !pc !54 + %635 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 0, !pc !54 + store i8 0, ptr %635, align 8, !pc !54 + %636 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 1, !pc !54 + store i8 0, ptr %636, align 1, !pc !54 + %637 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 2, !pc !54 + store i8 0, ptr %637, align 2, !pc !54 + %638 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 3, !pc !54 + store i8 0, ptr %638, align 1, !pc !54 + %639 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 4, !pc !54 + store i8 0, ptr %639, align 4, !pc !54 + %640 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 5, !pc !54 + store i8 0, ptr %640, align 1, !pc !54 + %641 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 6, !pc !54 + store i8 0, ptr %641, align 2, !pc !54 + %642 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 7, !pc !54 + store i8 0, ptr %642, align 1, !pc !54 + %643 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 8, !pc !54 + store i8 0, ptr %643, align 8, !pc !54 + %644 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 0, i32 0, i32 0, i64 9, !pc !54 + store i8 0, ptr %644, align 1, !pc !54 + %645 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 0, !pc !54 + store i8 0, ptr %645, align 2, !pc !54 + %646 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 1, !pc !54 + store i8 0, ptr %646, align 1, !pc !54 + %647 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 2, !pc !54 + store i8 0, ptr %647, align 4, !pc !54 + %648 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 3, !pc !54 + store i8 0, ptr %648, align 1, !pc !54 + %649 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 4, !pc !54 + store i8 0, ptr %649, align 2, !pc !54 + %650 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 13, i64 7, i32 1, i64 5, !pc !54 + store i8 0, ptr %650, align 1, !pc !54 + %651 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 0, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %651, align 8, !pc !54 + %652 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 1, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %652, align 8, !pc !54 + %653 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 2, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %653, align 8, !pc !54 + %654 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 3, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %654, align 8, !pc !54 + %655 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 4, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %655, align 8, !pc !54 + %656 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 5, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %656, align 8, !pc !54 + %657 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 6, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %657, align 8, !pc !54 + %658 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 7, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %658, align 8, !pc !54 + %659 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 8, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %659, align 8, !pc !54 + %660 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 9, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %660, align 8, !pc !54 + %661 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 10, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %661, align 8, !pc !54 + %662 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 11, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %662, align 8, !pc !54 + %663 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 12, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %663, align 8, !pc !54 + %664 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 13, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %664, align 8, !pc !54 + %665 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 14, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %665, align 8, !pc !54 + %666 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 0, i32 14, i64 15, i32 0, i32 0, i64 0, !pc !54 + store i128 0, ptr %666, align 8, !pc !54 + %667 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 0, !pc !54 + store i8 0, ptr %667, align 8, !pc !54 + %668 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 1, !pc !54 + store i8 0, ptr %668, align 1, !pc !54 + %669 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 2, !pc !54 + store i8 0, ptr %669, align 2, !pc !54 + %670 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 3, !pc !54 + store i8 0, ptr %670, align 1, !pc !54 + %671 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 4, !pc !54 + store i8 0, ptr %671, align 4, !pc !54 + %672 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 5, !pc !54 + store i8 0, ptr %672, align 1, !pc !54 + %673 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 6, !pc !54 + store i8 0, ptr %673, align 2, !pc !54 + %674 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 7, !pc !54 + store i8 0, ptr %674, align 1, !pc !54 + %675 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 8, !pc !54 + store i8 0, ptr %675, align 8, !pc !54 + %676 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 9, !pc !54 + store i8 0, ptr %676, align 1, !pc !54 + %677 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 10, !pc !54 + store i8 0, ptr %677, align 2, !pc !54 + %678 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 11, !pc !54 + store i8 0, ptr %678, align 1, !pc !54 + %679 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 12, !pc !54 + store i8 0, ptr %679, align 4, !pc !54 + %680 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 13, !pc !54 + store i8 0, ptr %680, align 1, !pc !54 + %681 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 14, !pc !54 + store i8 0, ptr %681, align 2, !pc !54 + %682 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 15, !pc !54 + store i8 0, ptr %682, align 1, !pc !54 + %683 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 16, !pc !54 + store i8 0, ptr %683, align 8, !pc !54 + %684 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 17, !pc !54 + store i8 0, ptr %684, align 1, !pc !54 + %685 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 18, !pc !54 + store i8 0, ptr %685, align 2, !pc !54 + %686 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 19, !pc !54 + store i8 0, ptr %686, align 1, !pc !54 + %687 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 20, !pc !54 + store i8 0, ptr %687, align 4, !pc !54 + %688 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 21, !pc !54 + store i8 0, ptr %688, align 1, !pc !54 + %689 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 22, !pc !54 + store i8 0, ptr %689, align 2, !pc !54 + %690 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 23, !pc !54 + store i8 0, ptr %690, align 1, !pc !54 + %691 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 24, !pc !54 + store i8 0, ptr %691, align 8, !pc !54 + %692 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 25, !pc !54 + store i8 0, ptr %692, align 1, !pc !54 + %693 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 26, !pc !54 + store i8 0, ptr %693, align 2, !pc !54 + %694 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 27, !pc !54 + store i8 0, ptr %694, align 1, !pc !54 + %695 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 28, !pc !54 + store i8 0, ptr %695, align 4, !pc !54 + %696 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 29, !pc !54 + store i8 0, ptr %696, align 1, !pc !54 + %697 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 30, !pc !54 + store i8 0, ptr %697, align 2, !pc !54 + %698 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 31, !pc !54 + store i8 0, ptr %698, align 1, !pc !54 + %699 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 32, !pc !54 + store i8 0, ptr %699, align 8, !pc !54 + %700 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 33, !pc !54 + store i8 0, ptr %700, align 1, !pc !54 + %701 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 34, !pc !54 + store i8 0, ptr %701, align 2, !pc !54 + %702 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 35, !pc !54 + store i8 0, ptr %702, align 1, !pc !54 + %703 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 36, !pc !54 + store i8 0, ptr %703, align 4, !pc !54 + %704 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 37, !pc !54 + store i8 0, ptr %704, align 1, !pc !54 + %705 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 38, !pc !54 + store i8 0, ptr %705, align 2, !pc !54 + %706 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 39, !pc !54 + store i8 0, ptr %706, align 1, !pc !54 + %707 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 40, !pc !54 + store i8 0, ptr %707, align 8, !pc !54 + %708 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 41, !pc !54 + store i8 0, ptr %708, align 1, !pc !54 + %709 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 42, !pc !54 + store i8 0, ptr %709, align 2, !pc !54 + %710 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 43, !pc !54 + store i8 0, ptr %710, align 1, !pc !54 + %711 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 44, !pc !54 + store i8 0, ptr %711, align 4, !pc !54 + %712 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 45, !pc !54 + store i8 0, ptr %712, align 1, !pc !54 + %713 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 46, !pc !54 + store i8 0, ptr %713, align 2, !pc !54 + %714 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 47, !pc !54 + store i8 0, ptr %714, align 1, !pc !54 + %715 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 48, !pc !54 + store i8 0, ptr %715, align 8, !pc !54 + %716 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 49, !pc !54 + store i8 0, ptr %716, align 1, !pc !54 + %717 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 50, !pc !54 + store i8 0, ptr %717, align 2, !pc !54 + %718 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 51, !pc !54 + store i8 0, ptr %718, align 1, !pc !54 + %719 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 52, !pc !54 + store i8 0, ptr %719, align 4, !pc !54 + %720 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 53, !pc !54 + store i8 0, ptr %720, align 1, !pc !54 + %721 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 54, !pc !54 + store i8 0, ptr %721, align 2, !pc !54 + %722 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 55, !pc !54 + store i8 0, ptr %722, align 1, !pc !54 + %723 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 56, !pc !54 + store i8 0, ptr %723, align 8, !pc !54 + %724 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 57, !pc !54 + store i8 0, ptr %724, align 1, !pc !54 + %725 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 58, !pc !54 + store i8 0, ptr %725, align 2, !pc !54 + %726 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 59, !pc !54 + store i8 0, ptr %726, align 1, !pc !54 + %727 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 60, !pc !54 + store i8 0, ptr %727, align 4, !pc !54 + %728 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 61, !pc !54 + store i8 0, ptr %728, align 1, !pc !54 + %729 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 62, !pc !54 + store i8 0, ptr %729, align 2, !pc !54 + %730 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 63, !pc !54 + store i8 0, ptr %730, align 1, !pc !54 + %731 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 64, !pc !54 + store i8 0, ptr %731, align 8, !pc !54 + %732 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 65, !pc !54 + store i8 0, ptr %732, align 1, !pc !54 + %733 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 66, !pc !54 + store i8 0, ptr %733, align 2, !pc !54 + %734 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 67, !pc !54 + store i8 0, ptr %734, align 1, !pc !54 + %735 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 68, !pc !54 + store i8 0, ptr %735, align 4, !pc !54 + %736 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 69, !pc !54 + store i8 0, ptr %736, align 1, !pc !54 + %737 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 70, !pc !54 + store i8 0, ptr %737, align 2, !pc !54 + %738 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 71, !pc !54 + store i8 0, ptr %738, align 1, !pc !54 + %739 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 72, !pc !54 + store i8 0, ptr %739, align 8, !pc !54 + %740 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 73, !pc !54 + store i8 0, ptr %740, align 1, !pc !54 + %741 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 74, !pc !54 + store i8 0, ptr %741, align 2, !pc !54 + %742 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 75, !pc !54 + store i8 0, ptr %742, align 1, !pc !54 + %743 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 76, !pc !54 + store i8 0, ptr %743, align 4, !pc !54 + %744 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 77, !pc !54 + store i8 0, ptr %744, align 1, !pc !54 + %745 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 78, !pc !54 + store i8 0, ptr %745, align 2, !pc !54 + %746 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 79, !pc !54 + store i8 0, ptr %746, align 1, !pc !54 + %747 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 80, !pc !54 + store i8 0, ptr %747, align 8, !pc !54 + %748 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 81, !pc !54 + store i8 0, ptr %748, align 1, !pc !54 + %749 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 82, !pc !54 + store i8 0, ptr %749, align 2, !pc !54 + %750 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 83, !pc !54 + store i8 0, ptr %750, align 1, !pc !54 + %751 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 84, !pc !54 + store i8 0, ptr %751, align 4, !pc !54 + %752 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 85, !pc !54 + store i8 0, ptr %752, align 1, !pc !54 + %753 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 86, !pc !54 + store i8 0, ptr %753, align 2, !pc !54 + %754 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 87, !pc !54 + store i8 0, ptr %754, align 1, !pc !54 + %755 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 88, !pc !54 + store i8 0, ptr %755, align 8, !pc !54 + %756 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 89, !pc !54 + store i8 0, ptr %756, align 1, !pc !54 + %757 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 90, !pc !54 + store i8 0, ptr %757, align 2, !pc !54 + %758 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 91, !pc !54 + store i8 0, ptr %758, align 1, !pc !54 + %759 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 92, !pc !54 + store i8 0, ptr %759, align 4, !pc !54 + %760 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 93, !pc !54 + store i8 0, ptr %760, align 1, !pc !54 + %761 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 94, !pc !54 + store i8 0, ptr %761, align 2, !pc !54 + %762 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 11, i32 0, i32 1, i64 95, !pc !54 + store i8 0, ptr %762, align 1, !pc !54 + %763 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 0, i32 0, i32 0, !pc !54 + store i64 0, ptr %763, align 8, !pc !54 + %764 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 0, i32 1, !pc !54 + store i32 0, ptr %764, align 8, !pc !54 + %765 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 0, i32 2, !pc !54 + store i32 0, ptr %765, align 4, !pc !54 + %766 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 1, i32 0, i32 0, !pc !54 + store i64 0, ptr %766, align 8, !pc !54 + %767 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 1, i32 1, !pc !54 + store i32 0, ptr %767, align 8, !pc !54 + %768 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 1, i32 2, !pc !54 + store i32 0, ptr %768, align 4, !pc !54 + %769 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 2, i32 0, i32 0, !pc !54 + store i64 0, ptr %769, align 8, !pc !54 + %770 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 2, i32 1, !pc !54 + store i32 0, ptr %770, align 8, !pc !54 + %771 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 2, i32 2, !pc !54 + store i32 0, ptr %771, align 4, !pc !54 + %772 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 3, i32 0, i32 0, !pc !54 + store i64 0, ptr %772, align 8, !pc !54 + %773 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 3, i32 1, !pc !54 + store i32 0, ptr %773, align 8, !pc !54 + %774 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 3, i32 2, !pc !54 + store i32 0, ptr %774, align 4, !pc !54 + %775 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 4, i32 0, i32 0, !pc !54 + store i64 0, ptr %775, align 8, !pc !54 + %776 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 4, i32 1, !pc !54 + store i32 0, ptr %776, align 8, !pc !54 + %777 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 4, i32 2, !pc !54 + store i32 0, ptr %777, align 4, !pc !54 + %778 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 5, i32 0, i32 0, !pc !54 + store i64 0, ptr %778, align 8, !pc !54 + %779 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 5, i32 1, !pc !54 + store i32 0, ptr %779, align 8, !pc !54 + %780 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 12, i32 5, i32 2, !pc !54 + store i32 0, ptr %780, align 4, !pc !54 + %781 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 0, i32 0, !pc !54 + store i64 0, ptr %781, align 8, !pc !54 + %782 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 0, i32 1, !pc !54 + store i64 0, ptr %782, align 8, !pc !54 + %783 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 1, i32 0, !pc !54 + store i64 0, ptr %783, align 8, !pc !54 + %784 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 1, i32 1, !pc !54 + store i64 0, ptr %784, align 8, !pc !54 + %785 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 2, i32 0, !pc !54 + store i64 0, ptr %785, align 8, !pc !54 + %786 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 2, i32 1, !pc !54 + store i64 0, ptr %786, align 8, !pc !54 + %787 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 3, i32 0, !pc !54 + store i64 0, ptr %787, align 8, !pc !54 + %788 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 3, i32 1, !pc !54 + store i64 0, ptr %788, align 8, !pc !54 + %789 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 4, i32 0, !pc !54 + store i64 0, ptr %789, align 8, !pc !54 + %790 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 4, i32 1, !pc !54 + store i64 0, ptr %790, align 8, !pc !54 + %791 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 5, i32 0, !pc !54 + store i64 0, ptr %791, align 8, !pc !54 + %792 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 5, i32 1, !pc !54 + store i64 0, ptr %792, align 8, !pc !54 + %793 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 6, i32 0, !pc !54 + store i64 0, ptr %793, align 8, !pc !54 + %794 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 6, i32 1, !pc !54 + store i64 0, ptr %794, align 8, !pc !54 + %795 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 7, i32 0, !pc !54 + store i64 0, ptr %795, align 8, !pc !54 + %796 = getelementptr inbounds %struct.State, ptr %7, i64 0, i32 0, i32 13, i32 0, i64 7, i32 1, !pc !54 + store i64 0, ptr %796, align 8, !pc !54 + %797 = load i64, ptr @__anvill_reg_RAX, align 8, !pc !54 + store i64 %797, ptr %308, align 8, !pc !54 + %798 = load i64, ptr @__anvill_reg_RBX, align 8, !pc !54 + store i64 %798, ptr %310, align 8, !pc !54 + %799 = load i64, ptr @__anvill_reg_RCX, align 8, !pc !54 + store i64 %799, ptr %312, align 8, !pc !54 + %800 = load i64, ptr @__anvill_reg_RDX, align 8, !pc !54 + store i64 %800, ptr %314, align 8, !pc !54 + %801 = load i64, ptr @__anvill_reg_RDI, align 8, !pc !54 + store i64 %801, ptr %318, align 8, !pc !54 + %802 = load i64, ptr @__anvill_reg_RBP, align 8, !pc !54 + store i64 %802, ptr %322, align 8, !pc !54 + %803 = load i64, ptr @__anvill_reg_R8, align 8, !pc !54 + store i64 %803, ptr %324, align 8, !pc !54 + %804 = load i64, ptr @__anvill_reg_R9, align 8, !pc !54 + store i64 %804, ptr %326, align 8, !pc !54 + %805 = load i64, ptr @__anvill_reg_R10, align 8, !pc !54 + store i64 %805, ptr %328, align 8, !pc !54 + %806 = load i64, ptr @__anvill_reg_R11, align 8, !pc !54 + store i64 %806, ptr %330, align 8, !pc !54 + %807 = load i64, ptr @__anvill_reg_R12, align 8, !pc !54 + store i64 %807, ptr %332, align 8, !pc !54 + %808 = load i64, ptr @__anvill_reg_R13, align 8, !pc !54 + store i64 %808, ptr %334, align 8, !pc !54 + %809 = load i64, ptr @__anvill_reg_R14, align 8, !pc !54 + store i64 %809, ptr %336, align 8, !pc !54 + %810 = load i64, ptr @__anvill_reg_R15, align 8, !pc !54 + store i64 %810, ptr %338, align 8, !pc !54 + %811 = load i16, ptr @__anvill_reg_SS, align 2, !pc !54 + store i16 %811, ptr %284, align 2, !pc !54 + %812 = load i16, ptr @__anvill_reg_ES, align 2, !pc !54 + store i16 %812, ptr %286, align 2, !pc !54 + %813 = load i16, ptr @__anvill_reg_GS, align 2, !pc !54 + store i16 %813, ptr %288, align 2, !pc !54 + %814 = load i16, ptr @__anvill_reg_FS, align 2, !pc !54 + store i16 %814, ptr %290, align 2, !pc !54 + %815 = load i16, ptr @__anvill_reg_DS, align 2, !pc !54 + store i16 %815, ptr %292, align 2, !pc !54 + %816 = load i16, ptr @__anvill_reg_CS, align 2, !pc !54 + store i16 %816, ptr %294, align 2, !pc !54 + %817 = load i8, ptr @__anvill_reg_XMM0, align 1, !pc !54 + %818 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 1), align 1, !pc !54 + %819 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 2), align 1, !pc !54 + %820 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 3), align 1, !pc !54 + %821 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 4), align 1, !pc !54 + %822 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 5), align 1, !pc !54 + %823 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 6), align 1, !pc !54 + %824 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 7), align 1, !pc !54 + %825 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 8), align 1, !pc !54 + %826 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 9), align 1, !pc !54 + %827 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 10), align 1, !pc !54 + %828 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 11), align 1, !pc !54 + %829 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 12), align 1, !pc !54 + %830 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 13), align 1, !pc !54 + %831 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 14), align 1, !pc !54 + %832 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM0, i64 0, i64 15), align 1, !pc !54 + store i8 %817, ptr %10, align 8, !pc !54 + %833 = getelementptr inbounds i8, ptr %7, i64 17, !pc !54 + store i8 %818, ptr %833, align 1, !pc !54 + %834 = getelementptr inbounds i8, ptr %7, i64 18, !pc !54 + store i8 %819, ptr %834, align 2, !pc !54 + %835 = getelementptr inbounds i8, ptr %7, i64 19, !pc !54 + store i8 %820, ptr %835, align 1, !pc !54 + %836 = getelementptr inbounds i8, ptr %7, i64 20, !pc !54 + store i8 %821, ptr %836, align 4, !pc !54 + %837 = getelementptr inbounds i8, ptr %7, i64 21, !pc !54 + store i8 %822, ptr %837, align 1, !pc !54 + %838 = getelementptr inbounds i8, ptr %7, i64 22, !pc !54 + store i8 %823, ptr %838, align 2, !pc !54 + %839 = getelementptr inbounds i8, ptr %7, i64 23, !pc !54 + store i8 %824, ptr %839, align 1, !pc !54 + store i8 %825, ptr %11, align 8, !pc !54 + %840 = getelementptr inbounds i8, ptr %7, i64 25, !pc !54 + store i8 %826, ptr %840, align 1, !pc !54 + %841 = getelementptr inbounds i8, ptr %7, i64 26, !pc !54 + store i8 %827, ptr %841, align 2, !pc !54 + %842 = getelementptr inbounds i8, ptr %7, i64 27, !pc !54 + store i8 %828, ptr %842, align 1, !pc !54 + %843 = getelementptr inbounds i8, ptr %7, i64 28, !pc !54 + store i8 %829, ptr %843, align 4, !pc !54 + %844 = getelementptr inbounds i8, ptr %7, i64 29, !pc !54 + store i8 %830, ptr %844, align 1, !pc !54 + %845 = getelementptr inbounds i8, ptr %7, i64 30, !pc !54 + store i8 %831, ptr %845, align 2, !pc !54 + %846 = getelementptr inbounds i8, ptr %7, i64 31, !pc !54 + store i8 %832, ptr %846, align 1, !pc !54 + %847 = load i8, ptr @__anvill_reg_XMM1, align 1, !pc !54 + %848 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 1), align 1, !pc !54 + %849 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 2), align 1, !pc !54 + %850 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 3), align 1, !pc !54 + %851 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 4), align 1, !pc !54 + %852 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 5), align 1, !pc !54 + %853 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 6), align 1, !pc !54 + %854 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 7), align 1, !pc !54 + %855 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 8), align 1, !pc !54 + %856 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 9), align 1, !pc !54 + %857 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 10), align 1, !pc !54 + %858 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 11), align 1, !pc !54 + %859 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 12), align 1, !pc !54 + %860 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 13), align 1, !pc !54 + %861 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 14), align 1, !pc !54 + %862 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM1, i64 0, i64 15), align 1, !pc !54 + store i8 %847, ptr %18, align 8, !pc !54 + %863 = getelementptr inbounds i8, ptr %7, i64 81, !pc !54 + store i8 %848, ptr %863, align 1, !pc !54 + %864 = getelementptr inbounds i8, ptr %7, i64 82, !pc !54 + store i8 %849, ptr %864, align 2, !pc !54 + %865 = getelementptr inbounds i8, ptr %7, i64 83, !pc !54 + store i8 %850, ptr %865, align 1, !pc !54 + %866 = getelementptr inbounds i8, ptr %7, i64 84, !pc !54 + store i8 %851, ptr %866, align 4, !pc !54 + %867 = getelementptr inbounds i8, ptr %7, i64 85, !pc !54 + store i8 %852, ptr %867, align 1, !pc !54 + %868 = getelementptr inbounds i8, ptr %7, i64 86, !pc !54 + store i8 %853, ptr %868, align 2, !pc !54 + %869 = getelementptr inbounds i8, ptr %7, i64 87, !pc !54 + store i8 %854, ptr %869, align 1, !pc !54 + store i8 %855, ptr %19, align 8, !pc !54 + %870 = getelementptr inbounds i8, ptr %7, i64 89, !pc !54 + store i8 %856, ptr %870, align 1, !pc !54 + %871 = getelementptr inbounds i8, ptr %7, i64 90, !pc !54 + store i8 %857, ptr %871, align 2, !pc !54 + %872 = getelementptr inbounds i8, ptr %7, i64 91, !pc !54 + store i8 %858, ptr %872, align 1, !pc !54 + %873 = getelementptr inbounds i8, ptr %7, i64 92, !pc !54 + store i8 %859, ptr %873, align 4, !pc !54 + %874 = getelementptr inbounds i8, ptr %7, i64 93, !pc !54 + store i8 %860, ptr %874, align 1, !pc !54 + %875 = getelementptr inbounds i8, ptr %7, i64 94, !pc !54 + store i8 %861, ptr %875, align 2, !pc !54 + %876 = getelementptr inbounds i8, ptr %7, i64 95, !pc !54 + store i8 %862, ptr %876, align 1, !pc !54 + %877 = load i8, ptr @__anvill_reg_XMM2, align 1, !pc !54 + %878 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 1), align 1, !pc !54 + %879 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 2), align 1, !pc !54 + %880 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 3), align 1, !pc !54 + %881 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 4), align 1, !pc !54 + %882 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 5), align 1, !pc !54 + %883 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 6), align 1, !pc !54 + %884 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 7), align 1, !pc !54 + %885 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 8), align 1, !pc !54 + %886 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 9), align 1, !pc !54 + %887 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 10), align 1, !pc !54 + %888 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 11), align 1, !pc !54 + %889 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 12), align 1, !pc !54 + %890 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 13), align 1, !pc !54 + %891 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 14), align 1, !pc !54 + %892 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM2, i64 0, i64 15), align 1, !pc !54 + store i8 %877, ptr %26, align 8, !pc !54 + %893 = getelementptr inbounds i8, ptr %7, i64 145, !pc !54 + store i8 %878, ptr %893, align 1, !pc !54 + %894 = getelementptr inbounds i8, ptr %7, i64 146, !pc !54 + store i8 %879, ptr %894, align 2, !pc !54 + %895 = getelementptr inbounds i8, ptr %7, i64 147, !pc !54 + store i8 %880, ptr %895, align 1, !pc !54 + %896 = getelementptr inbounds i8, ptr %7, i64 148, !pc !54 + store i8 %881, ptr %896, align 4, !pc !54 + %897 = getelementptr inbounds i8, ptr %7, i64 149, !pc !54 + store i8 %882, ptr %897, align 1, !pc !54 + %898 = getelementptr inbounds i8, ptr %7, i64 150, !pc !54 + store i8 %883, ptr %898, align 2, !pc !54 + %899 = getelementptr inbounds i8, ptr %7, i64 151, !pc !54 + store i8 %884, ptr %899, align 1, !pc !54 + store i8 %885, ptr %27, align 8, !pc !54 + %900 = getelementptr inbounds i8, ptr %7, i64 153, !pc !54 + store i8 %886, ptr %900, align 1, !pc !54 + %901 = getelementptr inbounds i8, ptr %7, i64 154, !pc !54 + store i8 %887, ptr %901, align 2, !pc !54 + %902 = getelementptr inbounds i8, ptr %7, i64 155, !pc !54 + store i8 %888, ptr %902, align 1, !pc !54 + %903 = getelementptr inbounds i8, ptr %7, i64 156, !pc !54 + store i8 %889, ptr %903, align 4, !pc !54 + %904 = getelementptr inbounds i8, ptr %7, i64 157, !pc !54 + store i8 %890, ptr %904, align 1, !pc !54 + %905 = getelementptr inbounds i8, ptr %7, i64 158, !pc !54 + store i8 %891, ptr %905, align 2, !pc !54 + %906 = getelementptr inbounds i8, ptr %7, i64 159, !pc !54 + store i8 %892, ptr %906, align 1, !pc !54 + %907 = load i8, ptr @__anvill_reg_XMM3, align 1, !pc !54 + %908 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 1), align 1, !pc !54 + %909 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 2), align 1, !pc !54 + %910 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 3), align 1, !pc !54 + %911 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 4), align 1, !pc !54 + %912 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 5), align 1, !pc !54 + %913 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 6), align 1, !pc !54 + %914 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 7), align 1, !pc !54 + %915 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 8), align 1, !pc !54 + %916 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 9), align 1, !pc !54 + %917 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 10), align 1, !pc !54 + %918 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 11), align 1, !pc !54 + %919 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 12), align 1, !pc !54 + %920 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 13), align 1, !pc !54 + %921 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 14), align 1, !pc !54 + %922 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM3, i64 0, i64 15), align 1, !pc !54 + store i8 %907, ptr %34, align 8, !pc !54 + %923 = getelementptr inbounds i8, ptr %7, i64 209, !pc !54 + store i8 %908, ptr %923, align 1, !pc !54 + %924 = getelementptr inbounds i8, ptr %7, i64 210, !pc !54 + store i8 %909, ptr %924, align 2, !pc !54 + %925 = getelementptr inbounds i8, ptr %7, i64 211, !pc !54 + store i8 %910, ptr %925, align 1, !pc !54 + %926 = getelementptr inbounds i8, ptr %7, i64 212, !pc !54 + store i8 %911, ptr %926, align 4, !pc !54 + %927 = getelementptr inbounds i8, ptr %7, i64 213, !pc !54 + store i8 %912, ptr %927, align 1, !pc !54 + %928 = getelementptr inbounds i8, ptr %7, i64 214, !pc !54 + store i8 %913, ptr %928, align 2, !pc !54 + %929 = getelementptr inbounds i8, ptr %7, i64 215, !pc !54 + store i8 %914, ptr %929, align 1, !pc !54 + store i8 %915, ptr %35, align 8, !pc !54 + %930 = getelementptr inbounds i8, ptr %7, i64 217, !pc !54 + store i8 %916, ptr %930, align 1, !pc !54 + %931 = getelementptr inbounds i8, ptr %7, i64 218, !pc !54 + store i8 %917, ptr %931, align 2, !pc !54 + %932 = getelementptr inbounds i8, ptr %7, i64 219, !pc !54 + store i8 %918, ptr %932, align 1, !pc !54 + %933 = getelementptr inbounds i8, ptr %7, i64 220, !pc !54 + store i8 %919, ptr %933, align 4, !pc !54 + %934 = getelementptr inbounds i8, ptr %7, i64 221, !pc !54 + store i8 %920, ptr %934, align 1, !pc !54 + %935 = getelementptr inbounds i8, ptr %7, i64 222, !pc !54 + store i8 %921, ptr %935, align 2, !pc !54 + %936 = getelementptr inbounds i8, ptr %7, i64 223, !pc !54 + store i8 %922, ptr %936, align 1, !pc !54 + %937 = load i8, ptr @__anvill_reg_XMM4, align 1, !pc !54 + %938 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 1), align 1, !pc !54 + %939 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 2), align 1, !pc !54 + %940 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 3), align 1, !pc !54 + %941 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 4), align 1, !pc !54 + %942 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 5), align 1, !pc !54 + %943 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 6), align 1, !pc !54 + %944 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 7), align 1, !pc !54 + %945 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 8), align 1, !pc !54 + %946 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 9), align 1, !pc !54 + %947 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 10), align 1, !pc !54 + %948 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 11), align 1, !pc !54 + %949 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 12), align 1, !pc !54 + %950 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 13), align 1, !pc !54 + %951 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 14), align 1, !pc !54 + %952 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM4, i64 0, i64 15), align 1, !pc !54 + store i8 %937, ptr %42, align 8, !pc !54 + %953 = getelementptr inbounds i8, ptr %7, i64 273, !pc !54 + store i8 %938, ptr %953, align 1, !pc !54 + %954 = getelementptr inbounds i8, ptr %7, i64 274, !pc !54 + store i8 %939, ptr %954, align 2, !pc !54 + %955 = getelementptr inbounds i8, ptr %7, i64 275, !pc !54 + store i8 %940, ptr %955, align 1, !pc !54 + %956 = getelementptr inbounds i8, ptr %7, i64 276, !pc !54 + store i8 %941, ptr %956, align 4, !pc !54 + %957 = getelementptr inbounds i8, ptr %7, i64 277, !pc !54 + store i8 %942, ptr %957, align 1, !pc !54 + %958 = getelementptr inbounds i8, ptr %7, i64 278, !pc !54 + store i8 %943, ptr %958, align 2, !pc !54 + %959 = getelementptr inbounds i8, ptr %7, i64 279, !pc !54 + store i8 %944, ptr %959, align 1, !pc !54 + store i8 %945, ptr %43, align 8, !pc !54 + %960 = getelementptr inbounds i8, ptr %7, i64 281, !pc !54 + store i8 %946, ptr %960, align 1, !pc !54 + %961 = getelementptr inbounds i8, ptr %7, i64 282, !pc !54 + store i8 %947, ptr %961, align 2, !pc !54 + %962 = getelementptr inbounds i8, ptr %7, i64 283, !pc !54 + store i8 %948, ptr %962, align 1, !pc !54 + %963 = getelementptr inbounds i8, ptr %7, i64 284, !pc !54 + store i8 %949, ptr %963, align 4, !pc !54 + %964 = getelementptr inbounds i8, ptr %7, i64 285, !pc !54 + store i8 %950, ptr %964, align 1, !pc !54 + %965 = getelementptr inbounds i8, ptr %7, i64 286, !pc !54 + store i8 %951, ptr %965, align 2, !pc !54 + %966 = getelementptr inbounds i8, ptr %7, i64 287, !pc !54 + store i8 %952, ptr %966, align 1, !pc !54 + %967 = load i8, ptr @__anvill_reg_XMM5, align 1, !pc !54 + %968 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 1), align 1, !pc !54 + %969 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 2), align 1, !pc !54 + %970 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 3), align 1, !pc !54 + %971 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 4), align 1, !pc !54 + %972 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 5), align 1, !pc !54 + %973 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 6), align 1, !pc !54 + %974 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 7), align 1, !pc !54 + %975 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 8), align 1, !pc !54 + %976 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 9), align 1, !pc !54 + %977 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 10), align 1, !pc !54 + %978 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 11), align 1, !pc !54 + %979 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 12), align 1, !pc !54 + %980 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 13), align 1, !pc !54 + %981 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 14), align 1, !pc !54 + %982 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM5, i64 0, i64 15), align 1, !pc !54 + store i8 %967, ptr %50, align 8, !pc !54 + %983 = getelementptr inbounds i8, ptr %7, i64 337, !pc !54 + store i8 %968, ptr %983, align 1, !pc !54 + %984 = getelementptr inbounds i8, ptr %7, i64 338, !pc !54 + store i8 %969, ptr %984, align 2, !pc !54 + %985 = getelementptr inbounds i8, ptr %7, i64 339, !pc !54 + store i8 %970, ptr %985, align 1, !pc !54 + %986 = getelementptr inbounds i8, ptr %7, i64 340, !pc !54 + store i8 %971, ptr %986, align 4, !pc !54 + %987 = getelementptr inbounds i8, ptr %7, i64 341, !pc !54 + store i8 %972, ptr %987, align 1, !pc !54 + %988 = getelementptr inbounds i8, ptr %7, i64 342, !pc !54 + store i8 %973, ptr %988, align 2, !pc !54 + %989 = getelementptr inbounds i8, ptr %7, i64 343, !pc !54 + store i8 %974, ptr %989, align 1, !pc !54 + store i8 %975, ptr %51, align 8, !pc !54 + %990 = getelementptr inbounds i8, ptr %7, i64 345, !pc !54 + store i8 %976, ptr %990, align 1, !pc !54 + %991 = getelementptr inbounds i8, ptr %7, i64 346, !pc !54 + store i8 %977, ptr %991, align 2, !pc !54 + %992 = getelementptr inbounds i8, ptr %7, i64 347, !pc !54 + store i8 %978, ptr %992, align 1, !pc !54 + %993 = getelementptr inbounds i8, ptr %7, i64 348, !pc !54 + store i8 %979, ptr %993, align 4, !pc !54 + %994 = getelementptr inbounds i8, ptr %7, i64 349, !pc !54 + store i8 %980, ptr %994, align 1, !pc !54 + %995 = getelementptr inbounds i8, ptr %7, i64 350, !pc !54 + store i8 %981, ptr %995, align 2, !pc !54 + %996 = getelementptr inbounds i8, ptr %7, i64 351, !pc !54 + store i8 %982, ptr %996, align 1, !pc !54 + %997 = load i8, ptr @__anvill_reg_XMM6, align 1, !pc !54 + %998 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 1), align 1, !pc !54 + %999 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 2), align 1, !pc !54 + %1000 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 3), align 1, !pc !54 + %1001 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 4), align 1, !pc !54 + %1002 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 5), align 1, !pc !54 + %1003 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 6), align 1, !pc !54 + %1004 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 7), align 1, !pc !54 + %1005 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 8), align 1, !pc !54 + %1006 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 9), align 1, !pc !54 + %1007 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 10), align 1, !pc !54 + %1008 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 11), align 1, !pc !54 + %1009 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 12), align 1, !pc !54 + %1010 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 13), align 1, !pc !54 + %1011 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 14), align 1, !pc !54 + %1012 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM6, i64 0, i64 15), align 1, !pc !54 + store i8 %997, ptr %58, align 8, !pc !54 + %1013 = getelementptr inbounds i8, ptr %7, i64 401, !pc !54 + store i8 %998, ptr %1013, align 1, !pc !54 + %1014 = getelementptr inbounds i8, ptr %7, i64 402, !pc !54 + store i8 %999, ptr %1014, align 2, !pc !54 + %1015 = getelementptr inbounds i8, ptr %7, i64 403, !pc !54 + store i8 %1000, ptr %1015, align 1, !pc !54 + %1016 = getelementptr inbounds i8, ptr %7, i64 404, !pc !54 + store i8 %1001, ptr %1016, align 4, !pc !54 + %1017 = getelementptr inbounds i8, ptr %7, i64 405, !pc !54 + store i8 %1002, ptr %1017, align 1, !pc !54 + %1018 = getelementptr inbounds i8, ptr %7, i64 406, !pc !54 + store i8 %1003, ptr %1018, align 2, !pc !54 + %1019 = getelementptr inbounds i8, ptr %7, i64 407, !pc !54 + store i8 %1004, ptr %1019, align 1, !pc !54 + store i8 %1005, ptr %59, align 8, !pc !54 + %1020 = getelementptr inbounds i8, ptr %7, i64 409, !pc !54 + store i8 %1006, ptr %1020, align 1, !pc !54 + %1021 = getelementptr inbounds i8, ptr %7, i64 410, !pc !54 + store i8 %1007, ptr %1021, align 2, !pc !54 + %1022 = getelementptr inbounds i8, ptr %7, i64 411, !pc !54 + store i8 %1008, ptr %1022, align 1, !pc !54 + %1023 = getelementptr inbounds i8, ptr %7, i64 412, !pc !54 + store i8 %1009, ptr %1023, align 4, !pc !54 + %1024 = getelementptr inbounds i8, ptr %7, i64 413, !pc !54 + store i8 %1010, ptr %1024, align 1, !pc !54 + %1025 = getelementptr inbounds i8, ptr %7, i64 414, !pc !54 + store i8 %1011, ptr %1025, align 2, !pc !54 + %1026 = getelementptr inbounds i8, ptr %7, i64 415, !pc !54 + store i8 %1012, ptr %1026, align 1, !pc !54 + %1027 = load i8, ptr @__anvill_reg_XMM7, align 1, !pc !54 + %1028 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 1), align 1, !pc !54 + %1029 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 2), align 1, !pc !54 + %1030 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 3), align 1, !pc !54 + %1031 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 4), align 1, !pc !54 + %1032 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 5), align 1, !pc !54 + %1033 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 6), align 1, !pc !54 + %1034 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 7), align 1, !pc !54 + %1035 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 8), align 1, !pc !54 + %1036 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 9), align 1, !pc !54 + %1037 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 10), align 1, !pc !54 + %1038 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 11), align 1, !pc !54 + %1039 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 12), align 1, !pc !54 + %1040 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 13), align 1, !pc !54 + %1041 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 14), align 1, !pc !54 + %1042 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM7, i64 0, i64 15), align 1, !pc !54 + store i8 %1027, ptr %66, align 8, !pc !54 + %1043 = getelementptr inbounds i8, ptr %7, i64 465, !pc !54 + store i8 %1028, ptr %1043, align 1, !pc !54 + %1044 = getelementptr inbounds i8, ptr %7, i64 466, !pc !54 + store i8 %1029, ptr %1044, align 2, !pc !54 + %1045 = getelementptr inbounds i8, ptr %7, i64 467, !pc !54 + store i8 %1030, ptr %1045, align 1, !pc !54 + %1046 = getelementptr inbounds i8, ptr %7, i64 468, !pc !54 + store i8 %1031, ptr %1046, align 4, !pc !54 + %1047 = getelementptr inbounds i8, ptr %7, i64 469, !pc !54 + store i8 %1032, ptr %1047, align 1, !pc !54 + %1048 = getelementptr inbounds i8, ptr %7, i64 470, !pc !54 + store i8 %1033, ptr %1048, align 2, !pc !54 + %1049 = getelementptr inbounds i8, ptr %7, i64 471, !pc !54 + store i8 %1034, ptr %1049, align 1, !pc !54 + store i8 %1035, ptr %67, align 8, !pc !54 + %1050 = getelementptr inbounds i8, ptr %7, i64 473, !pc !54 + store i8 %1036, ptr %1050, align 1, !pc !54 + %1051 = getelementptr inbounds i8, ptr %7, i64 474, !pc !54 + store i8 %1037, ptr %1051, align 2, !pc !54 + %1052 = getelementptr inbounds i8, ptr %7, i64 475, !pc !54 + store i8 %1038, ptr %1052, align 1, !pc !54 + %1053 = getelementptr inbounds i8, ptr %7, i64 476, !pc !54 + store i8 %1039, ptr %1053, align 4, !pc !54 + %1054 = getelementptr inbounds i8, ptr %7, i64 477, !pc !54 + store i8 %1040, ptr %1054, align 1, !pc !54 + %1055 = getelementptr inbounds i8, ptr %7, i64 478, !pc !54 + store i8 %1041, ptr %1055, align 2, !pc !54 + %1056 = getelementptr inbounds i8, ptr %7, i64 479, !pc !54 + store i8 %1042, ptr %1056, align 1, !pc !54 + %1057 = load i8, ptr @__anvill_reg_XMM8, align 1, !pc !54 + %1058 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 1), align 1, !pc !54 + %1059 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 2), align 1, !pc !54 + %1060 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 3), align 1, !pc !54 + %1061 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 4), align 1, !pc !54 + %1062 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 5), align 1, !pc !54 + %1063 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 6), align 1, !pc !54 + %1064 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 7), align 1, !pc !54 + %1065 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 8), align 1, !pc !54 + %1066 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 9), align 1, !pc !54 + %1067 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 10), align 1, !pc !54 + %1068 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 11), align 1, !pc !54 + %1069 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 12), align 1, !pc !54 + %1070 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 13), align 1, !pc !54 + %1071 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 14), align 1, !pc !54 + %1072 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM8, i64 0, i64 15), align 1, !pc !54 + store i8 %1057, ptr %74, align 8, !pc !54 + %1073 = getelementptr inbounds i8, ptr %7, i64 529, !pc !54 + store i8 %1058, ptr %1073, align 1, !pc !54 + %1074 = getelementptr inbounds i8, ptr %7, i64 530, !pc !54 + store i8 %1059, ptr %1074, align 2, !pc !54 + %1075 = getelementptr inbounds i8, ptr %7, i64 531, !pc !54 + store i8 %1060, ptr %1075, align 1, !pc !54 + %1076 = getelementptr inbounds i8, ptr %7, i64 532, !pc !54 + store i8 %1061, ptr %1076, align 4, !pc !54 + %1077 = getelementptr inbounds i8, ptr %7, i64 533, !pc !54 + store i8 %1062, ptr %1077, align 1, !pc !54 + %1078 = getelementptr inbounds i8, ptr %7, i64 534, !pc !54 + store i8 %1063, ptr %1078, align 2, !pc !54 + %1079 = getelementptr inbounds i8, ptr %7, i64 535, !pc !54 + store i8 %1064, ptr %1079, align 1, !pc !54 + store i8 %1065, ptr %75, align 8, !pc !54 + %1080 = getelementptr inbounds i8, ptr %7, i64 537, !pc !54 + store i8 %1066, ptr %1080, align 1, !pc !54 + %1081 = getelementptr inbounds i8, ptr %7, i64 538, !pc !54 + store i8 %1067, ptr %1081, align 2, !pc !54 + %1082 = getelementptr inbounds i8, ptr %7, i64 539, !pc !54 + store i8 %1068, ptr %1082, align 1, !pc !54 + %1083 = getelementptr inbounds i8, ptr %7, i64 540, !pc !54 + store i8 %1069, ptr %1083, align 4, !pc !54 + %1084 = getelementptr inbounds i8, ptr %7, i64 541, !pc !54 + store i8 %1070, ptr %1084, align 1, !pc !54 + %1085 = getelementptr inbounds i8, ptr %7, i64 542, !pc !54 + store i8 %1071, ptr %1085, align 2, !pc !54 + %1086 = getelementptr inbounds i8, ptr %7, i64 543, !pc !54 + store i8 %1072, ptr %1086, align 1, !pc !54 + %1087 = load i8, ptr @__anvill_reg_XMM9, align 1, !pc !54 + %1088 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 1), align 1, !pc !54 + %1089 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 2), align 1, !pc !54 + %1090 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 3), align 1, !pc !54 + %1091 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 4), align 1, !pc !54 + %1092 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 5), align 1, !pc !54 + %1093 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 6), align 1, !pc !54 + %1094 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 7), align 1, !pc !54 + %1095 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 8), align 1, !pc !54 + %1096 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 9), align 1, !pc !54 + %1097 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 10), align 1, !pc !54 + %1098 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 11), align 1, !pc !54 + %1099 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 12), align 1, !pc !54 + %1100 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 13), align 1, !pc !54 + %1101 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 14), align 1, !pc !54 + %1102 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM9, i64 0, i64 15), align 1, !pc !54 + store i8 %1087, ptr %82, align 8, !pc !54 + %1103 = getelementptr inbounds i8, ptr %7, i64 593, !pc !54 + store i8 %1088, ptr %1103, align 1, !pc !54 + %1104 = getelementptr inbounds i8, ptr %7, i64 594, !pc !54 + store i8 %1089, ptr %1104, align 2, !pc !54 + %1105 = getelementptr inbounds i8, ptr %7, i64 595, !pc !54 + store i8 %1090, ptr %1105, align 1, !pc !54 + %1106 = getelementptr inbounds i8, ptr %7, i64 596, !pc !54 + store i8 %1091, ptr %1106, align 4, !pc !54 + %1107 = getelementptr inbounds i8, ptr %7, i64 597, !pc !54 + store i8 %1092, ptr %1107, align 1, !pc !54 + %1108 = getelementptr inbounds i8, ptr %7, i64 598, !pc !54 + store i8 %1093, ptr %1108, align 2, !pc !54 + %1109 = getelementptr inbounds i8, ptr %7, i64 599, !pc !54 + store i8 %1094, ptr %1109, align 1, !pc !54 + store i8 %1095, ptr %83, align 8, !pc !54 + %1110 = getelementptr inbounds i8, ptr %7, i64 601, !pc !54 + store i8 %1096, ptr %1110, align 1, !pc !54 + %1111 = getelementptr inbounds i8, ptr %7, i64 602, !pc !54 + store i8 %1097, ptr %1111, align 2, !pc !54 + %1112 = getelementptr inbounds i8, ptr %7, i64 603, !pc !54 + store i8 %1098, ptr %1112, align 1, !pc !54 + %1113 = getelementptr inbounds i8, ptr %7, i64 604, !pc !54 + store i8 %1099, ptr %1113, align 4, !pc !54 + %1114 = getelementptr inbounds i8, ptr %7, i64 605, !pc !54 + store i8 %1100, ptr %1114, align 1, !pc !54 + %1115 = getelementptr inbounds i8, ptr %7, i64 606, !pc !54 + store i8 %1101, ptr %1115, align 2, !pc !54 + %1116 = getelementptr inbounds i8, ptr %7, i64 607, !pc !54 + store i8 %1102, ptr %1116, align 1, !pc !54 + %1117 = load i8, ptr @__anvill_reg_XMM10, align 1, !pc !54 + %1118 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 1), align 1, !pc !54 + %1119 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 2), align 1, !pc !54 + %1120 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 3), align 1, !pc !54 + %1121 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 4), align 1, !pc !54 + %1122 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 5), align 1, !pc !54 + %1123 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 6), align 1, !pc !54 + %1124 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 7), align 1, !pc !54 + %1125 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 8), align 1, !pc !54 + %1126 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 9), align 1, !pc !54 + %1127 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 10), align 1, !pc !54 + %1128 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 11), align 1, !pc !54 + %1129 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 12), align 1, !pc !54 + %1130 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 13), align 1, !pc !54 + %1131 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 14), align 1, !pc !54 + %1132 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM10, i64 0, i64 15), align 1, !pc !54 + store i8 %1117, ptr %90, align 8, !pc !54 + %1133 = getelementptr inbounds i8, ptr %7, i64 657, !pc !54 + store i8 %1118, ptr %1133, align 1, !pc !54 + %1134 = getelementptr inbounds i8, ptr %7, i64 658, !pc !54 + store i8 %1119, ptr %1134, align 2, !pc !54 + %1135 = getelementptr inbounds i8, ptr %7, i64 659, !pc !54 + store i8 %1120, ptr %1135, align 1, !pc !54 + %1136 = getelementptr inbounds i8, ptr %7, i64 660, !pc !54 + store i8 %1121, ptr %1136, align 4, !pc !54 + %1137 = getelementptr inbounds i8, ptr %7, i64 661, !pc !54 + store i8 %1122, ptr %1137, align 1, !pc !54 + %1138 = getelementptr inbounds i8, ptr %7, i64 662, !pc !54 + store i8 %1123, ptr %1138, align 2, !pc !54 + %1139 = getelementptr inbounds i8, ptr %7, i64 663, !pc !54 + store i8 %1124, ptr %1139, align 1, !pc !54 + store i8 %1125, ptr %91, align 8, !pc !54 + %1140 = getelementptr inbounds i8, ptr %7, i64 665, !pc !54 + store i8 %1126, ptr %1140, align 1, !pc !54 + %1141 = getelementptr inbounds i8, ptr %7, i64 666, !pc !54 + store i8 %1127, ptr %1141, align 2, !pc !54 + %1142 = getelementptr inbounds i8, ptr %7, i64 667, !pc !54 + store i8 %1128, ptr %1142, align 1, !pc !54 + %1143 = getelementptr inbounds i8, ptr %7, i64 668, !pc !54 + store i8 %1129, ptr %1143, align 4, !pc !54 + %1144 = getelementptr inbounds i8, ptr %7, i64 669, !pc !54 + store i8 %1130, ptr %1144, align 1, !pc !54 + %1145 = getelementptr inbounds i8, ptr %7, i64 670, !pc !54 + store i8 %1131, ptr %1145, align 2, !pc !54 + %1146 = getelementptr inbounds i8, ptr %7, i64 671, !pc !54 + store i8 %1132, ptr %1146, align 1, !pc !54 + %1147 = load i8, ptr @__anvill_reg_XMM11, align 1, !pc !54 + %1148 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 1), align 1, !pc !54 + %1149 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 2), align 1, !pc !54 + %1150 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 3), align 1, !pc !54 + %1151 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 4), align 1, !pc !54 + %1152 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 5), align 1, !pc !54 + %1153 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 6), align 1, !pc !54 + %1154 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 7), align 1, !pc !54 + %1155 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 8), align 1, !pc !54 + %1156 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 9), align 1, !pc !54 + %1157 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 10), align 1, !pc !54 + %1158 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 11), align 1, !pc !54 + %1159 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 12), align 1, !pc !54 + %1160 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 13), align 1, !pc !54 + %1161 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 14), align 1, !pc !54 + %1162 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM11, i64 0, i64 15), align 1, !pc !54 + store i8 %1147, ptr %98, align 8, !pc !54 + %1163 = getelementptr inbounds i8, ptr %7, i64 721, !pc !54 + store i8 %1148, ptr %1163, align 1, !pc !54 + %1164 = getelementptr inbounds i8, ptr %7, i64 722, !pc !54 + store i8 %1149, ptr %1164, align 2, !pc !54 + %1165 = getelementptr inbounds i8, ptr %7, i64 723, !pc !54 + store i8 %1150, ptr %1165, align 1, !pc !54 + %1166 = getelementptr inbounds i8, ptr %7, i64 724, !pc !54 + store i8 %1151, ptr %1166, align 4, !pc !54 + %1167 = getelementptr inbounds i8, ptr %7, i64 725, !pc !54 + store i8 %1152, ptr %1167, align 1, !pc !54 + %1168 = getelementptr inbounds i8, ptr %7, i64 726, !pc !54 + store i8 %1153, ptr %1168, align 2, !pc !54 + %1169 = getelementptr inbounds i8, ptr %7, i64 727, !pc !54 + store i8 %1154, ptr %1169, align 1, !pc !54 + store i8 %1155, ptr %99, align 8, !pc !54 + %1170 = getelementptr inbounds i8, ptr %7, i64 729, !pc !54 + store i8 %1156, ptr %1170, align 1, !pc !54 + %1171 = getelementptr inbounds i8, ptr %7, i64 730, !pc !54 + store i8 %1157, ptr %1171, align 2, !pc !54 + %1172 = getelementptr inbounds i8, ptr %7, i64 731, !pc !54 + store i8 %1158, ptr %1172, align 1, !pc !54 + %1173 = getelementptr inbounds i8, ptr %7, i64 732, !pc !54 + store i8 %1159, ptr %1173, align 4, !pc !54 + %1174 = getelementptr inbounds i8, ptr %7, i64 733, !pc !54 + store i8 %1160, ptr %1174, align 1, !pc !54 + %1175 = getelementptr inbounds i8, ptr %7, i64 734, !pc !54 + store i8 %1161, ptr %1175, align 2, !pc !54 + %1176 = getelementptr inbounds i8, ptr %7, i64 735, !pc !54 + store i8 %1162, ptr %1176, align 1, !pc !54 + %1177 = load i8, ptr @__anvill_reg_XMM12, align 1, !pc !54 + %1178 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 1), align 1, !pc !54 + %1179 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 2), align 1, !pc !54 + %1180 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 3), align 1, !pc !54 + %1181 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 4), align 1, !pc !54 + %1182 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 5), align 1, !pc !54 + %1183 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 6), align 1, !pc !54 + %1184 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 7), align 1, !pc !54 + %1185 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 8), align 1, !pc !54 + %1186 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 9), align 1, !pc !54 + %1187 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 10), align 1, !pc !54 + %1188 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 11), align 1, !pc !54 + %1189 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 12), align 1, !pc !54 + %1190 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 13), align 1, !pc !54 + %1191 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 14), align 1, !pc !54 + %1192 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM12, i64 0, i64 15), align 1, !pc !54 + store i8 %1177, ptr %106, align 8, !pc !54 + %1193 = getelementptr inbounds i8, ptr %7, i64 785, !pc !54 + store i8 %1178, ptr %1193, align 1, !pc !54 + %1194 = getelementptr inbounds i8, ptr %7, i64 786, !pc !54 + store i8 %1179, ptr %1194, align 2, !pc !54 + %1195 = getelementptr inbounds i8, ptr %7, i64 787, !pc !54 + store i8 %1180, ptr %1195, align 1, !pc !54 + %1196 = getelementptr inbounds i8, ptr %7, i64 788, !pc !54 + store i8 %1181, ptr %1196, align 4, !pc !54 + %1197 = getelementptr inbounds i8, ptr %7, i64 789, !pc !54 + store i8 %1182, ptr %1197, align 1, !pc !54 + %1198 = getelementptr inbounds i8, ptr %7, i64 790, !pc !54 + store i8 %1183, ptr %1198, align 2, !pc !54 + %1199 = getelementptr inbounds i8, ptr %7, i64 791, !pc !54 + store i8 %1184, ptr %1199, align 1, !pc !54 + store i8 %1185, ptr %107, align 8, !pc !54 + %1200 = getelementptr inbounds i8, ptr %7, i64 793, !pc !54 + store i8 %1186, ptr %1200, align 1, !pc !54 + %1201 = getelementptr inbounds i8, ptr %7, i64 794, !pc !54 + store i8 %1187, ptr %1201, align 2, !pc !54 + %1202 = getelementptr inbounds i8, ptr %7, i64 795, !pc !54 + store i8 %1188, ptr %1202, align 1, !pc !54 + %1203 = getelementptr inbounds i8, ptr %7, i64 796, !pc !54 + store i8 %1189, ptr %1203, align 4, !pc !54 + %1204 = getelementptr inbounds i8, ptr %7, i64 797, !pc !54 + store i8 %1190, ptr %1204, align 1, !pc !54 + %1205 = getelementptr inbounds i8, ptr %7, i64 798, !pc !54 + store i8 %1191, ptr %1205, align 2, !pc !54 + %1206 = getelementptr inbounds i8, ptr %7, i64 799, !pc !54 + store i8 %1192, ptr %1206, align 1, !pc !54 + %1207 = load i8, ptr @__anvill_reg_XMM13, align 1, !pc !54 + %1208 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 1), align 1, !pc !54 + %1209 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 2), align 1, !pc !54 + %1210 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 3), align 1, !pc !54 + %1211 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 4), align 1, !pc !54 + %1212 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 5), align 1, !pc !54 + %1213 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 6), align 1, !pc !54 + %1214 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 7), align 1, !pc !54 + %1215 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 8), align 1, !pc !54 + %1216 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 9), align 1, !pc !54 + %1217 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 10), align 1, !pc !54 + %1218 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 11), align 1, !pc !54 + %1219 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 12), align 1, !pc !54 + %1220 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 13), align 1, !pc !54 + %1221 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 14), align 1, !pc !54 + %1222 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM13, i64 0, i64 15), align 1, !pc !54 + store i8 %1207, ptr %114, align 8, !pc !54 + %1223 = getelementptr inbounds i8, ptr %7, i64 849, !pc !54 + store i8 %1208, ptr %1223, align 1, !pc !54 + %1224 = getelementptr inbounds i8, ptr %7, i64 850, !pc !54 + store i8 %1209, ptr %1224, align 2, !pc !54 + %1225 = getelementptr inbounds i8, ptr %7, i64 851, !pc !54 + store i8 %1210, ptr %1225, align 1, !pc !54 + %1226 = getelementptr inbounds i8, ptr %7, i64 852, !pc !54 + store i8 %1211, ptr %1226, align 4, !pc !54 + %1227 = getelementptr inbounds i8, ptr %7, i64 853, !pc !54 + store i8 %1212, ptr %1227, align 1, !pc !54 + %1228 = getelementptr inbounds i8, ptr %7, i64 854, !pc !54 + store i8 %1213, ptr %1228, align 2, !pc !54 + %1229 = getelementptr inbounds i8, ptr %7, i64 855, !pc !54 + store i8 %1214, ptr %1229, align 1, !pc !54 + store i8 %1215, ptr %115, align 8, !pc !54 + %1230 = getelementptr inbounds i8, ptr %7, i64 857, !pc !54 + store i8 %1216, ptr %1230, align 1, !pc !54 + %1231 = getelementptr inbounds i8, ptr %7, i64 858, !pc !54 + store i8 %1217, ptr %1231, align 2, !pc !54 + %1232 = getelementptr inbounds i8, ptr %7, i64 859, !pc !54 + store i8 %1218, ptr %1232, align 1, !pc !54 + %1233 = getelementptr inbounds i8, ptr %7, i64 860, !pc !54 + store i8 %1219, ptr %1233, align 4, !pc !54 + %1234 = getelementptr inbounds i8, ptr %7, i64 861, !pc !54 + store i8 %1220, ptr %1234, align 1, !pc !54 + %1235 = getelementptr inbounds i8, ptr %7, i64 862, !pc !54 + store i8 %1221, ptr %1235, align 2, !pc !54 + %1236 = getelementptr inbounds i8, ptr %7, i64 863, !pc !54 + store i8 %1222, ptr %1236, align 1, !pc !54 + %1237 = load i8, ptr @__anvill_reg_XMM14, align 1, !pc !54 + %1238 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 1), align 1, !pc !54 + %1239 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 2), align 1, !pc !54 + %1240 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 3), align 1, !pc !54 + %1241 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 4), align 1, !pc !54 + %1242 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 5), align 1, !pc !54 + %1243 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 6), align 1, !pc !54 + %1244 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 7), align 1, !pc !54 + %1245 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 8), align 1, !pc !54 + %1246 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 9), align 1, !pc !54 + %1247 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 10), align 1, !pc !54 + %1248 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 11), align 1, !pc !54 + %1249 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 12), align 1, !pc !54 + %1250 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 13), align 1, !pc !54 + %1251 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 14), align 1, !pc !54 + %1252 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM14, i64 0, i64 15), align 1, !pc !54 + store i8 %1237, ptr %122, align 8, !pc !54 + %1253 = getelementptr inbounds i8, ptr %7, i64 913, !pc !54 + store i8 %1238, ptr %1253, align 1, !pc !54 + %1254 = getelementptr inbounds i8, ptr %7, i64 914, !pc !54 + store i8 %1239, ptr %1254, align 2, !pc !54 + %1255 = getelementptr inbounds i8, ptr %7, i64 915, !pc !54 + store i8 %1240, ptr %1255, align 1, !pc !54 + %1256 = getelementptr inbounds i8, ptr %7, i64 916, !pc !54 + store i8 %1241, ptr %1256, align 4, !pc !54 + %1257 = getelementptr inbounds i8, ptr %7, i64 917, !pc !54 + store i8 %1242, ptr %1257, align 1, !pc !54 + %1258 = getelementptr inbounds i8, ptr %7, i64 918, !pc !54 + store i8 %1243, ptr %1258, align 2, !pc !54 + %1259 = getelementptr inbounds i8, ptr %7, i64 919, !pc !54 + store i8 %1244, ptr %1259, align 1, !pc !54 + store i8 %1245, ptr %123, align 8, !pc !54 + %1260 = getelementptr inbounds i8, ptr %7, i64 921, !pc !54 + store i8 %1246, ptr %1260, align 1, !pc !54 + %1261 = getelementptr inbounds i8, ptr %7, i64 922, !pc !54 + store i8 %1247, ptr %1261, align 2, !pc !54 + %1262 = getelementptr inbounds i8, ptr %7, i64 923, !pc !54 + store i8 %1248, ptr %1262, align 1, !pc !54 + %1263 = getelementptr inbounds i8, ptr %7, i64 924, !pc !54 + store i8 %1249, ptr %1263, align 4, !pc !54 + %1264 = getelementptr inbounds i8, ptr %7, i64 925, !pc !54 + store i8 %1250, ptr %1264, align 1, !pc !54 + %1265 = getelementptr inbounds i8, ptr %7, i64 926, !pc !54 + store i8 %1251, ptr %1265, align 2, !pc !54 + %1266 = getelementptr inbounds i8, ptr %7, i64 927, !pc !54 + store i8 %1252, ptr %1266, align 1, !pc !54 + %1267 = load i8, ptr @__anvill_reg_XMM15, align 1, !pc !54 + %1268 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 1), align 1, !pc !54 + %1269 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 2), align 1, !pc !54 + %1270 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 3), align 1, !pc !54 + %1271 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 4), align 1, !pc !54 + %1272 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 5), align 1, !pc !54 + %1273 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 6), align 1, !pc !54 + %1274 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 7), align 1, !pc !54 + %1275 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 8), align 1, !pc !54 + %1276 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 9), align 1, !pc !54 + %1277 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 10), align 1, !pc !54 + %1278 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 11), align 1, !pc !54 + %1279 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 12), align 1, !pc !54 + %1280 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 13), align 1, !pc !54 + %1281 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 14), align 1, !pc !54 + %1282 = load i8, ptr getelementptr inbounds ([16 x i8], ptr @__anvill_reg_XMM15, i64 0, i64 15), align 1, !pc !54 + store i8 %1267, ptr %130, align 8, !pc !54 + %1283 = getelementptr inbounds i8, ptr %7, i64 977, !pc !54 + store i8 %1268, ptr %1283, align 1, !pc !54 + %1284 = getelementptr inbounds i8, ptr %7, i64 978, !pc !54 + store i8 %1269, ptr %1284, align 2, !pc !54 + %1285 = getelementptr inbounds i8, ptr %7, i64 979, !pc !54 + store i8 %1270, ptr %1285, align 1, !pc !54 + %1286 = getelementptr inbounds i8, ptr %7, i64 980, !pc !54 + store i8 %1271, ptr %1286, align 4, !pc !54 + %1287 = getelementptr inbounds i8, ptr %7, i64 981, !pc !54 + store i8 %1272, ptr %1287, align 1, !pc !54 + %1288 = getelementptr inbounds i8, ptr %7, i64 982, !pc !54 + store i8 %1273, ptr %1288, align 2, !pc !54 + %1289 = getelementptr inbounds i8, ptr %7, i64 983, !pc !54 + store i8 %1274, ptr %1289, align 1, !pc !54 + store i8 %1275, ptr %131, align 8, !pc !54 + %1290 = getelementptr inbounds i8, ptr %7, i64 985, !pc !54 + store i8 %1276, ptr %1290, align 1, !pc !54 + %1291 = getelementptr inbounds i8, ptr %7, i64 986, !pc !54 + store i8 %1277, ptr %1291, align 2, !pc !54 + %1292 = getelementptr inbounds i8, ptr %7, i64 987, !pc !54 + store i8 %1278, ptr %1292, align 1, !pc !54 + %1293 = getelementptr inbounds i8, ptr %7, i64 988, !pc !54 + store i8 %1279, ptr %1293, align 4, !pc !54 + %1294 = getelementptr inbounds i8, ptr %7, i64 989, !pc !54 + store i8 %1280, ptr %1294, align 1, !pc !54 + %1295 = getelementptr inbounds i8, ptr %7, i64 990, !pc !54 + store i8 %1281, ptr %1295, align 2, !pc !54 + %1296 = getelementptr inbounds i8, ptr %7, i64 991, !pc !54 + store i8 %1282, ptr %1296, align 1, !pc !54 + %1297 = load x86_fp80, ptr @__anvill_reg_ST0, align 16, !pc !54 + store x86_fp80 %1297, ptr %347, align 16, !pc !54 + %1298 = load x86_fp80, ptr @__anvill_reg_ST1, align 16, !pc !54 + store x86_fp80 %1298, ptr %363, align 16, !pc !54 + %1299 = load x86_fp80, ptr @__anvill_reg_ST2, align 16, !pc !54 + store x86_fp80 %1299, ptr %379, align 16, !pc !54 + %1300 = load x86_fp80, ptr @__anvill_reg_ST3, align 16, !pc !54 + store x86_fp80 %1300, ptr %395, align 16, !pc !54 + %1301 = load x86_fp80, ptr @__anvill_reg_ST4, align 16, !pc !54 + store x86_fp80 %1301, ptr %411, align 16, !pc !54 + %1302 = load x86_fp80, ptr @__anvill_reg_ST5, align 16, !pc !54 + store x86_fp80 %1302, ptr %427, align 16, !pc !54 + %1303 = load x86_fp80, ptr @__anvill_reg_ST6, align 16, !pc !54 + store x86_fp80 %1303, ptr %443, align 16, !pc !54 + %1304 = load x86_fp80, ptr @__anvill_reg_ST7, align 16, !pc !54 + store x86_fp80 %1304, ptr %459, align 16, !pc !54 + %1305 = load i64, ptr @__anvill_reg_MM0, align 8, !pc !54 + store i64 %1305, ptr %470, align 8, !pc !54 + %1306 = load i64, ptr @__anvill_reg_MM1, align 8, !pc !54 + store i64 %1306, ptr %472, align 8, !pc !54 + %1307 = load i64, ptr @__anvill_reg_MM2, align 8, !pc !54 + store i64 %1307, ptr %474, align 8, !pc !54 + %1308 = load i64, ptr @__anvill_reg_MM3, align 8, !pc !54 + store i64 %1308, ptr %476, align 8, !pc !54 + %1309 = load i64, ptr @__anvill_reg_MM4, align 8, !pc !54 + store i64 %1309, ptr %478, align 8, !pc !54 + %1310 = load i64, ptr @__anvill_reg_MM5, align 8, !pc !54 + store i64 %1310, ptr %480, align 8, !pc !54 + %1311 = load i64, ptr @__anvill_reg_MM6, align 8, !pc !54 + store i64 %1311, ptr %482, align 8, !pc !54 + %1312 = load i64, ptr @__anvill_reg_MM7, align 8, !pc !54 + store i64 %1312, ptr %484, align 8, !pc !54 + %1313 = load i8, ptr @__anvill_reg_AF, align 1, !pc !54 + store i8 %1313, ptr %271, align 1, !pc !54 + %1314 = load i8, ptr @__anvill_reg_CF, align 1, !pc !54 + store i8 %1314, ptr %267, align 1, !pc !54 + %1315 = load i8, ptr @__anvill_reg_DF, align 1, !pc !54 + store i8 %1315, ptr %277, align 1, !pc !54 + %1316 = load i8, ptr @__anvill_reg_OF, align 1, !pc !54 + store i8 %1316, ptr %279, align 1, !pc !54 + %1317 = load i8, ptr @__anvill_reg_PF, align 1, !pc !54 + store i8 %1317, ptr %269, align 1, !pc !54 + %1318 = load i8, ptr @__anvill_reg_SF, align 1, !pc !54 + store i8 %1318, ptr %275, align 1, !pc !54 + %1319 = load i8, ptr @__anvill_reg_ZF, align 1, !pc !54 + store i8 %1319, ptr %273, align 1, !pc !54 + store i64 ptrtoint (ptr addrspacecast (ptr addrspace(256) null to ptr) to i64), ptr %300, align 8, !pc !54 + store i64 ptrtoint (ptr addrspacecast (ptr addrspace(257) null to ptr) to i64), ptr %302, align 8, !pc !54 + store i64 %4, ptr %320, align 8, !pc !54 + store i64 %3, ptr %return_address_loc, align 8, !pc !54 + store i32 %0, ptr %318, align 8, !pc !54 + %1320 = ptrtoint ptr %1 to i64, !pc !54 + store i64 %1320, ptr %316, align 8, !pc !54 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %6), !pc !54 + store i64 ptrtoint (ptr @sub_401270__AI_SI_B_64 to i64), ptr %6, align 8, !pc !54 + store i64 ptrtoint (ptr @sub_401270__AI_SI_B_64 to i64), ptr %340, align 8, !pc !54 + %1321 = call fastcc ptr @basic_block_func4199024(ptr nonnull %7, i64 ptrtoint (ptr @sub_401270__AI_SI_B_64 to i64), ptr null, ptr nonnull %6) #7, !pc !54 + %1322 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1322, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199049, label %inst_401289.i + i64 4199074, label %inst_4012a2.i + ], !pc !54 + +inst_401289.i: ; preds = %2 + %1323 = call fastcc ptr @basic_block_func4199049(ptr nonnull %7, i64 4199049, ptr %1321, ptr nonnull %6) #7, !pc !54 + %1324 = load i64, ptr %6, align 8, !pc !54 + %1325 = icmp eq i64 %1324, 4199922, !pc !54 + call void @llvm.assume(i1 %1325), !pc !54 + br label %inst_4015f2.i, !pc !54 + +inst_4015f2.i: ; preds = %inst_401289.i, %inst_4015ee.i + %1326 = phi ptr [ %1347, %inst_4015ee.i ], [ %1323, %inst_401289.i ], !pc !54 + %1327 = call fastcc ptr @basic_block_func4199922(ptr nonnull %7, i64 4199922, ptr %1326, ptr nonnull %6) #7, !pc !54 + unreachable, !pc !54 + +inst_401306.i: ; preds = %inst_4012a2.i + %1328 = call fastcc ptr @basic_block_func4199174(ptr nonnull %7, i64 4199174, ptr %1369, ptr nonnull %6) #7, !pc !54 + %1329 = load i64, ptr %6, align 8, !pc !54 + %1330 = icmp eq i64 %1329, 4199184, !pc !54 + call void @llvm.assume(i1 %1330), !pc !54 + br label %inst_401310.i, !pc !54 + +inst_401310.i: ; preds = %inst_401306.i, %inst_401310.i + %1331 = phi ptr [ %1328, %inst_401306.i ], [ %1332, %inst_401310.i ], !pc !54 + %1332 = call fastcc ptr @basic_block_func4199184(ptr nonnull %7, i64 4199184, ptr %1331, ptr nonnull %6) #7, !pc !54 + %1333 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1333, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199219, label %inst_401333.i + i64 4199184, label %inst_401310.i + ], !pc !54 + +inst_4014f9.i: ; preds = %inst_4014f1.i + %1334 = call fastcc ptr @basic_block_func4199673(ptr nonnull %7, i64 4199673, ptr %1352, ptr nonnull %6) #7, !pc !54 + %1335 = load i64, ptr %6, align 8, !pc !54 + %1336 = icmp eq i64 %1335, 4199701, !pc !54 + call void @llvm.assume(i1 %1336), !pc !54 + br label %inst_401515.i, !pc !54 + +inst_401515.i: ; preds = %inst_4014f9.i, %inst_401508.i, %inst_4014f1.i, %inst_4013e0.i + %1337 = phi ptr [ %1344, %inst_4013e0.i ], [ %1354, %inst_401508.i ], [ %1352, %inst_4014f1.i ], [ %1334, %inst_4014f9.i ], !pc !54 + %1338 = call fastcc ptr @basic_block_func4199701(ptr nonnull %7, i64 4199701, ptr %1337, ptr nonnull %6) #7, !pc !54 + %1339 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1339, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199888, label %inst_4015d0.i + i64 4199392, label %inst_4013e0.i + ], !pc !54 + +inst_4015d0.i: ; preds = %inst_401515.i + %1340 = call fastcc ptr @basic_block_func4199888(ptr nonnull %7, i64 4199888, ptr %1338, ptr nonnull %6) #7, !pc !54 + %1341 = load i64, ptr %6, align 8, !pc !54 + %1342 = icmp eq i64 %1341, 4199918, !pc !54 + call void @llvm.assume(i1 %1342), !pc !54 + br label %inst_4015ee.i, !pc !54 + +inst_4013e0.i: ; preds = %inst_401381.i, %inst_4015d2.i, %inst_401515.i + %1343 = phi ptr [ %1357, %inst_401381.i ], [ %1338, %inst_401515.i ], [ %1371, %inst_4015d2.i ], !pc !54 + %1344 = call fastcc ptr @basic_block_func4199392(ptr nonnull %7, i64 4199392, ptr %1343, ptr nonnull %6) #7, !pc !54 + %1345 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1345, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199470, label %inst_40142e.i + i64 4199701, label %inst_401515.i + ], !pc !54 + +inst_4015ee.i: ; preds = %inst_4015d0.i, %inst_4015d2.i + %1346 = phi ptr [ %1340, %inst_4015d0.i ], [ %1371, %inst_4015d2.i ], !pc !54 + %1347 = call fastcc ptr @basic_block_func4199918(ptr nonnull %7, i64 4199918, ptr %1346, ptr nonnull %6) #7, !pc !54 + %1348 = load i64, ptr %6, align 8, !pc !54 + %1349 = icmp eq i64 %1348, 4199922, !pc !54 + call void @llvm.assume(i1 %1349), !pc !54 + br label %inst_4015f2.i, !pc !54 + +inst_401449.i: ; preds = %inst_40142e.i + %1350 = call fastcc ptr @basic_block_func4199497(ptr nonnull %7, i64 4199497, ptr %1367, ptr nonnull %6) #7, !pc !54 + %1351 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1351, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199665, label %inst_4014f1.i + i64 4199688, label %inst_401508.i + ], !pc !54 + +inst_4014f1.i: ; preds = %inst_401449.i + %1352 = call fastcc ptr @basic_block_func4199665(ptr nonnull %7, i64 4199665, ptr %1350, ptr nonnull %6) #7, !pc !54 + %1353 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1353, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199673, label %inst_4014f9.i + i64 4199701, label %inst_401515.i + ], !pc !54 + +inst_401508.i: ; preds = %inst_401449.i + %1354 = call fastcc ptr @basic_block_func4199688(ptr nonnull %7, i64 4199688, ptr %1350, ptr nonnull %6) #7, !pc !54 + %1355 = load i64, ptr %6, align 8, !pc !54 + %1356 = icmp eq i64 %1355, 4199701, !pc !54 + call void @llvm.assume(i1 %1356), !pc !54 + br label %inst_401515.i, !pc !54 + +inst_401381.i: ; preds = %inst_401350.i + %1357 = call fastcc ptr @basic_block_func4199297(ptr nonnull %7, i64 4199297, ptr %1365, ptr nonnull %6) #7, !pc !54 + %1358 = load i64, ptr %6, align 8, !pc !54 + %1359 = icmp eq i64 %1358, 4199392, !pc !54 + call void @llvm.assume(i1 %1359), !pc !54 + br label %inst_4013e0.i, !pc !54 + +inst_401333.i: ; preds = %inst_4012a2.i, %inst_401310.i + %1360 = phi ptr [ %1369, %inst_4012a2.i ], [ %1332, %inst_401310.i ], !pc !54 + %1361 = call fastcc ptr @basic_block_func4199219(ptr nonnull %7, i64 4199219, ptr %1360, ptr nonnull %6) #7, !pc !54 + %1362 = load i64, ptr %6, align 8, !pc !54 + %1363 = icmp eq i64 %1362, 4199248, !pc !54 + call void @llvm.assume(i1 %1363), !pc !54 + br label %inst_401350.i, !pc !54 + +inst_401350.i: ; preds = %inst_401333.i, %inst_401350.i + %1364 = phi ptr [ %1361, %inst_401333.i ], [ %1365, %inst_401350.i ], !pc !54 + %1365 = call fastcc ptr @basic_block_func4199248(ptr nonnull %7, i64 4199248, ptr %1364, ptr nonnull %6) #7, !pc !54 + %1366 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1366, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199297, label %inst_401381.i + i64 4199248, label %inst_401350.i + ], !pc !54 + +inst_40142e.i: ; preds = %inst_4013e0.i + %1367 = call fastcc ptr @basic_block_func4199470(ptr nonnull %7, i64 4199470, ptr %1344, ptr nonnull %6) #7, !pc !54 + %1368 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1368, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199497, label %inst_401449.i + i64 4199890, label %inst_4015d2.i + ], !pc !54 + +inst_4012a2.i: ; preds = %2 + %1369 = call fastcc ptr @basic_block_func4199074(ptr nonnull %7, i64 4199074, ptr %1321, ptr nonnull %6) #7, !pc !54 + %1370 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1370, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199174, label %inst_401306.i + i64 4199219, label %inst_401333.i + ], !pc !54 + +inst_4015d2.i: ; preds = %inst_40142e.i + %1371 = call fastcc ptr @basic_block_func4199890(ptr nonnull %7, i64 4199890, ptr %1367, ptr nonnull %6) #7, !pc !54 + %1372 = load i64, ptr %6, align 8, !pc !54 + switch i64 %1372, label %sub_401270__AI_SI_B_64.lifted.exit [ + i64 4199918, label %inst_4015ee.i + i64 4199392, label %inst_4013e0.i + ], !pc !54 + +sub_401270__AI_SI_B_64.lifted.exit: ; preds = %inst_4015d2.i, %inst_4012a2.i, %inst_40142e.i, %inst_401350.i, %inst_4014f1.i, %inst_401449.i, %inst_4013e0.i, %inst_401515.i, %inst_401310.i, %2 + unreachable, !pc !54 +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4 + +; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #5 + +; Function Attrs: nocallback nofree nosync nounwind readnone willreturn +declare ptr @llvm.returnaddress(i32 immarg) #6 + +attributes #0 = { noinline } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +attributes #2 = { mustprogress noduplicate nofree noinline nosync nounwind optnone readnone willreturn "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" } +attributes #3 = { noduplicate noinline nounwind optnone "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" } +attributes #4 = { argmemonly nocallback nofree nosync nounwind willreturn } +attributes #5 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +attributes #6 = { nocallback nofree nosync nounwind readnone willreturn } +attributes #7 = { nounwind } + +!0 = !{[4 x i8] c"EAX\00"} +!1 = !{[4 x i8] c"RDI\00"} +!2 = !{[4 x i8] c"RBX\00"} +!3 = !{[4 x i8] c"RSI\00"} +!4 = !{[3 x i8] c"PC\00"} +!5 = !{!6, !6, i64 0} +!6 = !{!"long long", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !7, i64 2065} +!10 = !{!"_ZTS8X86State", !11, i64 0, !7, i64 16, !14, i64 2064, !7, i64 2080, !15, i64 2088, !17, i64 2112, !19, i64 2208, !20, i64 2480, !21, i64 2608, !22, i64 2736, !7, i64 2760, !7, i64 2768, !23, i64 3280, !25, i64 3376} +!11 = !{!"_ZTS9ArchState", !12, i64 0, !13, i64 4, !7, i64 8} +!12 = !{!"_ZTSN14AsyncHyperCall4NameE", !7, i64 0} +!13 = !{!"int", !7, i64 0} +!14 = !{!"_ZTS10ArithFlags", !7, i64 0, !7, i64 1, !7, i64 2, !7, i64 3, !7, i64 4, !7, i64 5, !7, i64 6, !7, i64 7, !7, i64 8, !7, i64 9, !7, i64 10, !7, i64 11, !7, i64 12, !7, i64 13, !7, i64 14, !7, i64 15} +!15 = !{!"_ZTS8Segments", !16, i64 0, !7, i64 2, !16, i64 4, !7, i64 6, !16, i64 8, !7, i64 10, !16, i64 12, !7, i64 14, !16, i64 16, !7, i64 18, !16, i64 20, !7, i64 22} +!16 = !{!"short", !7, i64 0} +!17 = !{!"_ZTS12AddressSpace", !6, i64 0, !18, i64 8, !6, i64 16, !18, i64 24, !6, i64 32, !18, i64 40, !6, i64 48, !18, i64 56, !6, i64 64, !18, i64 72, !6, i64 80, !18, i64 88} +!18 = !{!"_ZTS3Reg", !7, i64 0} +!19 = !{!"_ZTS3GPR", !6, i64 0, !18, i64 8, !6, i64 16, !18, i64 24, !6, i64 32, !18, i64 40, !6, i64 48, !18, i64 56, !6, i64 64, !18, i64 72, !6, i64 80, !18, i64 88, !6, i64 96, !18, i64 104, !6, i64 112, !18, i64 120, !6, i64 128, !18, i64 136, !6, i64 144, !18, i64 152, !6, i64 160, !18, i64 168, !6, i64 176, !18, i64 184, !6, i64 192, !18, i64 200, !6, i64 208, !18, i64 216, !6, i64 224, !18, i64 232, !6, i64 240, !18, i64 248, !6, i64 256, !18, i64 264} +!20 = !{!"_ZTS8X87Stack", !7, i64 0} +!21 = !{!"_ZTS3MMX", !7, i64 0} +!22 = !{!"_ZTS14FPUStatusFlags", !7, i64 0, !7, i64 1, !7, i64 2, !7, i64 3, !7, i64 4, !7, i64 5, !7, i64 6, !7, i64 7, !7, i64 8, !7, i64 9, !7, i64 10, !7, i64 11, !7, i64 12, !7, i64 13, !7, i64 14, !7, i64 15, !7, i64 16, !7, i64 17, !7, i64 18, !7, i64 19, !7, i64 20} +!23 = !{!"_ZTS13SegmentCaches", !24, i64 0, !24, i64 16, !24, i64 32, !24, i64 48, !24, i64 64, !24, i64 80} +!24 = !{!"_ZTS13SegmentShadow", !7, i64 0, !13, i64 8, !13, i64 12} +!25 = !{!"_ZTS5K_REG", !7, i64 0} +!26 = !{i8 0, i8 9} +!27 = !{!10, !7, i64 2067} +!28 = !{!10, !7, i64 2071} +!29 = !{!10, !7, i64 2073} +!30 = !{!10, !7, i64 2077} +!31 = !{!10, !7, i64 2069} +!32 = !{!7, !7, i64 0} +!33 = !{[4 x i8] c"RBP\00"} +!34 = !{[4 x i8] c"RSP\00"} +!35 = !{[4 x i8] c"R14\00"} +!36 = !{[4 x i8] c"R15\00"} +!37 = !{[4 x i8] c"R13\00"} +!38 = !{[4 x i8] c"R12\00"} +!39 = !{[3 x i8] c"AL\00"} +!40 = !{[3 x i8] c"R8\00"} +!41 = !{[4 x i8] c"RCX\00"} +!42 = !{[4 x i8] c"RDX\00"} +!43 = !{[4 x i8] c"EBP\00"} +!44 = !{[4 x i8] c"RAX\00"} +!45 = !{[4 x i8] c"R11\00"} +!46 = !{[4 x i8] c"R10\00"} +!47 = !{[3 x i8] c"R9\00"} +!48 = !{[5 x i8] c"R12D\00"} +!49 = !{[4 x i8] c"ECX\00"} +!50 = !{[4 x i8] c"EDX\00"} +!51 = !{[4 x i8] c"ESI\00"} +!52 = !{!10, !7, i64 2075} +!53 = !{[4 x i8] c"EDI\00"} +!54 = !{i64 4199024} +!55 = !{i64 0} diff --git a/tests/anvill_passes/src/TestAbstractStackBB.cpp b/tests/anvill_passes/src/TestAbstractStackBB.cpp new file mode 100644 index 000000000..8d532a971 --- /dev/null +++ b/tests/anvill_passes/src/TestAbstractStackBB.cpp @@ -0,0 +1,21 @@ + + +// basic_block_func4199701 + +#include +#include +#include + +#include "Utils.h" + + +namespace anvill { +TEST_SUITE("Basic Block tests") { + TEST_CASE("Convert parameters") { + auto llvm_context = anvill::CreateContextWithOpaquePointers(); + auto module = LoadTestData(*llvm_context, "MainBasicBlocks.ll"); + auto bb_func = module->getFunction("basic_block_func4199701"); + bb_func->dump(); + } +} +} // namespace anvill \ No newline at end of file From c5a37b46eabdbd50a59e229ccc19ff2a6109117e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 16:14:15 -0500 Subject: [PATCH 016/163] add contexts --- data_specifications/specification.proto | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 8d8fdddb8..f6e557c22 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -158,6 +158,27 @@ message Callable { ReturnStackPointer return_stack_pointer = 10; } + +// These arent quite affine relations we just store +// relationships of Reg=Reg'+Offset where Reg' is the register value at entry +// to the function. +// Maybe we want to get more complicated when doing the analysis, +// but this is kinda +// what we want at the end to determine stack relationships. +// This very closely matches ghidra's SymbolicPropogator: +// https://ghidra.re/ghidra_docs/api/ghidra/program/util/SymbolicPropogator.Value.html#getRelativeRegister() +message OffsetDomain { + string target_reg = 1; + // An empty base indicates a constant. + optional string base = 2; + int64 offset = 3; +} + +message BlockContext { + repeated OffsetDomain symvals = 1; +} + + message CodeBlock { uint64 address = 1; string name = 2; @@ -173,6 +194,7 @@ message Function { Callable callable = 4; map blocks = 5; map local_variables = 6; + map block_context = 7; } message GlobalVariable { From d7bcb86dba746254e670aa5cad7a4364c4ba6055 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 22:47:08 -0500 Subject: [PATCH 017/163] add start of param pass --- .../anvill_passes/src/TestAbstractStackBB.cpp | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/anvill_passes/src/TestAbstractStackBB.cpp b/tests/anvill_passes/src/TestAbstractStackBB.cpp index 8d532a971..cdd28250c 100644 --- a/tests/anvill_passes/src/TestAbstractStackBB.cpp +++ b/tests/anvill_passes/src/TestAbstractStackBB.cpp @@ -2,14 +2,93 @@ // basic_block_func4199701 +#include #include #include +#include #include +#include +#include +#include +#include +#include +#include +#include + +#include +#include #include "Utils.h" namespace anvill { + + +class BasicBlockContext { + public: + virtual const std::vector &GetAvailableVariables() const = 0; +}; + +struct AnvillBasicBlock { + llvm::Function *func; + const BasicBlockContext &context; +}; + + +class InitializeRegisterParameterPass { + private: + AnvillBasicBlock basic_block; + remill::Arch::ArchPtr arch; + + + public: + InitializeRegisterParameterPass(AnvillBasicBlock basic_block_) + : basic_block(basic_block_) {} + + + llvm::Function *BuildNewFunc() { + + std::vector args( + basic_block.func->getFunctionType()->param_begin(), + basic_block.func->getFunctionType()->param_end()); + + auto num_bb_params = args.size(); + auto vars = this->basic_block.context.GetAvailableVariables(); + for (const auto &v : vars) { + args.push_back(v.type); + } + + auto ntype = llvm::FunctionType::get( + this->basic_block.func->getReturnType(), args, false); + + auto nfunc = llvm::Function::Create( + ntype, llvm::GlobalValue::ExternalLinkage, + this->basic_block.func->getName(), this->basic_block.func->getParent()); + + + llvm::ValueToValueMapTy mp; + llvm::SmallVector rets; + llvm::CloneFunctionInto(nfunc, this->basic_block.func, mp, + llvm::CloneFunctionChangeType::LocalChangesOnly, + rets); + + nfunc->dump(); + + return nfunc; + } + + + void run() {} +}; + + +class MockBasicBlockContext : BasicBlockContext { + std::vector paramdecls; + + public: + virtual const std::vector &GetAvailableVariables() const = 0; +}; + TEST_SUITE("Basic Block tests") { TEST_CASE("Convert parameters") { auto llvm_context = anvill::CreateContextWithOpaquePointers(); From 88fa7f1a40a7c19cefdf491fc56c997a06554135 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 21 Nov 2022 22:57:10 -0500 Subject: [PATCH 018/163] add pass plans --- tests/anvill_passes/src/TestAbstractStackBB.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/anvill_passes/src/TestAbstractStackBB.cpp b/tests/anvill_passes/src/TestAbstractStackBB.cpp index cdd28250c..d55f56728 100644 --- a/tests/anvill_passes/src/TestAbstractStackBB.cpp +++ b/tests/anvill_passes/src/TestAbstractStackBB.cpp @@ -81,6 +81,21 @@ class InitializeRegisterParameterPass { void run() {} }; +/* +Register pass plan: +1. iterate through all available paramater decls declaring them in the signature. +2. Call StoreNativeValue to store the parameter representing each parameter into the physcal location in the state +3. Apply SROA to the new clone +4. Replace all calls to the basic block function with the clone (should just be one but whatev) +4. When calling the basic block function we now need to call LoadLiftedValue on the parameter decl for each physical location + +Stack pass plan: +1. Add a stack parameter that’s just a byte array created in the parent that is the stack size. +2. Identify remill_reads and writes and call something that looks like the xref resolver on them. The only trick is you need to basically record out when you hit a register and then check if that register is holding some stack offset, take the register+stack_offset_in_that_register+the offset computed on the path to finding that register (ie. the xref resolver will be calculating the total displacement along the way) +3. Then we redirect the remill_read to a load from the stack variable at the computed stack offset +4. This could get arbitrarily more complicated when handling expressions built up over multiple registers and array indexing with multiplication over an index register, so there is stuff to work on here (maybe propagating the abstract domain forward as a separate affine analysis) +*/ + class MockBasicBlockContext : BasicBlockContext { std::vector paramdecls; From d12d4380555f98d0bd15a6ca5b58e9f3d6c5e784 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Tue, 22 Nov 2022 15:39:57 +0100 Subject: [PATCH 019/163] First attempt at recovering params/locals --- lib/Lifters/FunctionLifter.cpp | 43 ++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 1ed2a2aaf..813edf51a 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1557,7 +1557,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - std::array args; + std::vector args(remill::kNumBlockArgs + 1); args[remill::kStatePointerArgNum] = state_ptr; args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, blk.addr); @@ -1566,6 +1566,19 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(llvm_blk); + for (auto ¶m : curr_decl->params) { + args.push_back(LoadLiftedValue(param, type_specifier.Dictionary(), + intrinsics, llvm_blk, state_ptr, + args[remill::kMemoryPointerArgNum])); + } + for (auto &[name, local] : curr_decl->locals) { + if (local.values.size() == 1) { + args.push_back(LoadLiftedValue( + local.values[0], type_specifier.Dictionary(), intrinsics, llvm_blk, + state_ptr, args[remill::kMemoryPointerArgNum])); + } + } + auto new_mem_ptr = builder.CreateCall(bb_lifted_func.func, args); auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); @@ -1604,6 +1617,15 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { std::vector params = std::vector( lifted_func_type->param_begin(), lifted_func_type->param_end()); params.push_back(llvm::PointerType::get(context, 0)); + size_t first_param_arg = params.size(); + for (auto ¶m : curr_decl->params) { + params.push_back(param.type); + } + for (auto &[name, local] : curr_decl->locals) { + if (local.values.size() == 1) { + params.push_back(local.values[0].type); + } + } llvm::FunctionType *func_type = llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); @@ -1614,7 +1636,7 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, this->semantics_module.get()); - auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); + llvm::Value *memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); @@ -1625,6 +1647,23 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { options.arch->InitializeEmptyLiftedFunction(func); + auto &blk = func->getEntryBlock(); + for (auto ¶m : curr_decl->params) { + auto arg = func->getArg(first_param_arg++); + arg->setName(param.name); + memory = StoreNativeValue(arg, param, type_provider.Dictionary(), + intrinsics, &blk, state, memory); + } + for (auto &[name, local] : curr_decl->locals) { + if (local.values.size() == 1) { + auto arg = func->getArg(first_param_arg++); + arg->setName(name); + memory = + StoreNativeValue(arg, local.values[0], type_provider.Dictionary(), + intrinsics, &blk, state, memory); + } + } + auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); From 888b3ee333b67772ce28a5195a41d99eed6d9ad6 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 28 Nov 2022 18:55:11 +0100 Subject: [PATCH 020/163] Keep track of register offsets? --- include/anvill/Declarations.h | 9 +++++ include/anvill/Utils.h | 12 +++++-- lib/Lifters/FunctionLifter.cpp | 14 ++++++++ lib/Protobuf.cpp | 15 ++++++++- lib/Utils.cpp | 60 ++++++++++++++++++++-------------- 5 files changed, 82 insertions(+), 28 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 894c8fd71..7d6c79b0b 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -42,6 +42,14 @@ struct Register; } // namespace remill namespace anvill { +struct RegisterOffset { + const remill::Register *target; + + // A null base indicates a pure offset + const remill::Register *base; + std::int64_t offset; +}; + struct CodeBlock { uint64_t addr; uint32_t size; @@ -50,6 +58,7 @@ struct CodeBlock { // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a block. std::unordered_map context_assignments; + std::vector register_offsets; }; class TypeDictionary; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 5c1eec6ec..e75a3262f 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include #include @@ -79,12 +81,18 @@ bool CanBeAliased(llvm::Value *val); // Produce one or more instructions in `in_block` to load and return // the lifted value associated with `decl`. -llvm::Value *LoadLiftedValue(const ValueDecl &decl, - const TypeDictionary &types, +llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); +void StoreNativeValueToRegister(llvm::Value *native_val, + const remill::Register *reg, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::BasicBlock *in_block, + llvm::Value *state_ptr); + // Produce one or more instructions in `in_block` to store the // native value `native_val` into the lifted state associated // with `decl`. diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 813edf51a..5d514a088 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1648,6 +1648,20 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { options.arch->InitializeEmptyLiftedFunction(func); auto &blk = func->getEntryBlock(); + for (auto ®_off : block.register_offsets) { + llvm::Value *new_value = + llvm::ConstantInt::get(pc_reg->type, reg_off.offset, true); + if (reg_off.base) { + new_value = llvm::BinaryOperator::Create( + llvm::BinaryOperator::Add, new_value, + op_lifter->LoadRegValue(&blk, state, reg_off.base->name), + llvm::Twine(), &blk); + } + StoreNativeValueToRegister(new_value, reg_off.target, + type_provider.Dictionary(), intrinsics, &blk, + state); + } + for (auto ¶m : curr_decl->params) { auto arg = func->getArg(first_param_arg++); arg->setName(param.name); diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index be1e634f7..d0975b383 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -540,9 +540,22 @@ void ProtobufTranslator::ParseCFGIntoFunction( {blk.second.outgoing_blocks().begin(), blk.second.outgoing_blocks().end()}, {blk.second.context_assignments().begin(), - blk.second.context_assignments().end()}}; + blk.second.context_assignments().end()}, + {}}; decl.cfg.emplace(blk.first, std::move(nblk)); } + + for (auto &[blk_addr, ctx] : obj.block_context()) { + auto blk = decl.cfg[blk_addr]; + for (auto &symval : ctx.symvals()) { + RegisterOffset reg_off{}; + reg_off.offset = symval.offset(); + reg_off.target = arch->RegisterByName(symval.target_reg()); + if (symval.has_base()) { + reg_off.base = arch->RegisterByName(symval.base()); + } + } + } } diff --git a/lib/Utils.cpp b/lib/Utils.cpp index c59f65b93..39ae05524 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -240,6 +240,39 @@ void CopyMetadataTo(llvm::Value *src, llvm::Value *dst) { } } +void StoreNativeValueToRegister(llvm::Value *native_val, + const remill::Register *reg, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::BasicBlock *in_block, + llvm::Value *state_ptr) { + auto func = in_block->getParent(); + auto module = func->getParent(); + auto &context = module->getContext(); + + auto reg_type = remill::RecontextualizeType(reg->type, context); + auto ptr_to_reg = reg->AddressOf(state_ptr, in_block); + llvm::IRBuilder<> ir(in_block); + + llvm::StoreInst *store = nullptr; + + auto ipoint = ir.GetInsertPoint(); + auto iblock = ir.GetInsertBlock(); + auto adapted_val = types.ConvertValueToType(ir, native_val, reg_type); + ir.SetInsertPoint(iblock, ipoint); + + if (adapted_val) { + store = ir.CreateStore(adapted_val, ptr_to_reg); + + } else { + auto ptr = ir.CreateBitCast(ptr_to_reg, + llvm::PointerType::get(ir.getContext(), 0)); + CopyMetadataTo(native_val, ptr); + store = ir.CreateStore(native_val, ptr); + } + CopyMetadataTo(native_val, store); +} + // Produce one or more instructions in `in_block` to store the // native value `native_val` into the lifted state associated // with `decl`. @@ -260,31 +293,8 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, // Store it to a register. if (decl.reg) { - auto reg_type = remill::RecontextualizeType(decl.reg->type, context); - auto ptr_to_reg = decl.reg->AddressOf(state_ptr, in_block); - llvm::IRBuilder<> ir(in_block); - if (decl_type != reg_type) { - ir.CreateStore(llvm::Constant::getNullValue(reg_type), ptr_to_reg); - } - - llvm::StoreInst *store = nullptr; - - auto ipoint = ir.GetInsertPoint(); - auto iblock = ir.GetInsertBlock(); - auto adapted_val = types.ConvertValueToType(ir, native_val, reg_type); - ir.SetInsertPoint(iblock, ipoint); - - if (adapted_val) { - store = ir.CreateStore(adapted_val, ptr_to_reg); - - } else { - auto ptr = ir.CreateBitCast(ptr_to_reg, - llvm::PointerType::get(ir.getContext(), 0)); - CopyMetadataTo(native_val, ptr); - store = ir.CreateStore(native_val, ptr); - } - CopyMetadataTo(native_val, store); - + StoreNativeValueToRegister(native_val, decl.reg, types, intrinsics, + in_block, state_ptr); return mem_ptr; // Store it to memory. From 4943211ba84ccd0c729e974e0cda12dba0fb423e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 28 Nov 2022 07:17:54 -0500 Subject: [PATCH 021/163] Revert "First attempt at recovering params/locals" This reverts commit d12d4380555f98d0bd15a6ca5b58e9f3d6c5e784. --- lib/Lifters/FunctionLifter.cpp | 42 ++-------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 5d514a088..b9e70a57d 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1557,7 +1557,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - std::vector args(remill::kNumBlockArgs + 1); + std::array args; args[remill::kStatePointerArgNum] = state_ptr; args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, blk.addr); @@ -1566,19 +1566,6 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(llvm_blk); - for (auto ¶m : curr_decl->params) { - args.push_back(LoadLiftedValue(param, type_specifier.Dictionary(), - intrinsics, llvm_blk, state_ptr, - args[remill::kMemoryPointerArgNum])); - } - for (auto &[name, local] : curr_decl->locals) { - if (local.values.size() == 1) { - args.push_back(LoadLiftedValue( - local.values[0], type_specifier.Dictionary(), intrinsics, llvm_blk, - state_ptr, args[remill::kMemoryPointerArgNum])); - } - } - auto new_mem_ptr = builder.CreateCall(bb_lifted_func.func, args); auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); @@ -1617,15 +1604,6 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { std::vector params = std::vector( lifted_func_type->param_begin(), lifted_func_type->param_end()); params.push_back(llvm::PointerType::get(context, 0)); - size_t first_param_arg = params.size(); - for (auto ¶m : curr_decl->params) { - params.push_back(param.type); - } - for (auto &[name, local] : curr_decl->locals) { - if (local.values.size() == 1) { - params.push_back(local.values[0].type); - } - } llvm::FunctionType *func_type = llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); @@ -1636,7 +1614,7 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, this->semantics_module.get()); - llvm::Value *memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); + auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); @@ -1662,22 +1640,6 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { state); } - for (auto ¶m : curr_decl->params) { - auto arg = func->getArg(first_param_arg++); - arg->setName(param.name); - memory = StoreNativeValue(arg, param, type_provider.Dictionary(), - intrinsics, &blk, state, memory); - } - for (auto &[name, local] : curr_decl->locals) { - if (local.values.size() == 1) { - auto arg = func->getArg(first_param_arg++); - arg->setName(name); - memory = - StoreNativeValue(arg, local.values[0], type_provider.Dictionary(), - intrinsics, &blk, state, memory); - } - } - auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); From b33c574244efa1c71adfd0aef23d4af0281fd6a3 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 28 Nov 2022 17:05:12 -0500 Subject: [PATCH 022/163] working on transforms --- include/anvill/ABI.h | 2 + include/anvill/Declarations.h | 32 +++++++ include/anvill/Lifters.h | 29 ++++--- include/anvill/Utils.h | 3 + lib/ABI.cpp | 14 ++-- lib/CMakeLists.txt | 2 + lib/Declarations.cpp | 27 ++++++ lib/Lifters/BasicBlockTransform.cpp | 79 +++++++++++++++++ lib/Lifters/BasicBlockTransform.h | 50 +++++++++++ lib/Lifters/FunctionLifter.cpp | 84 ++++++++++++++++--- lib/Lifters/FunctionLifter.h | 13 +++ lib/Utils.cpp | 13 +++ .../anvill_passes/src/TestAbstractStackBB.cpp | 64 -------------- 13 files changed, 317 insertions(+), 95 deletions(-) create mode 100644 lib/Lifters/BasicBlockTransform.cpp create mode 100644 lib/Lifters/BasicBlockTransform.h diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index ef3c4689a..17b7c78ad 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -82,4 +82,6 @@ extern const std::string kAnvillDataProvenanceFunc; // `alloca`. extern const std::string kAnvillStackZero; +extern const std::string kBasicBlockMetadata; + } // namespace anvill diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 7d6c79b0b..ec56cea7a 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -61,6 +61,17 @@ struct CodeBlock { std::vector register_offsets; }; + +struct OffsetDomain { + remill::Register *target_register; + std::optional base_register; + std::int64_t offset; +}; +struct SpecStackOffsets { + std::vector affine_equalities; +}; + + class TypeDictionary; // A value, such as a parameter or a return value. Values are resident @@ -195,6 +206,21 @@ struct LocalVariableDecl { std::vector values; }; +class BasicBlockContext { + public: + virtual std::vector GetAvailableVariables() const = 0; +}; + +struct FunctionDecl; +class SpecBlockContext : public BasicBlockContext { + private: + const FunctionDecl &decl; + + public: + SpecBlockContext(const FunctionDecl &decl) : decl(decl) {} + virtual std::vector GetAvailableVariables() const override; +}; + // A function decl, as represented at a "near ABI" level. To be specific, // not all C, and most C++ decls, as written would be directly translatable // to this. This ought nearly represent how LLVM represents a C/C++ function @@ -210,6 +236,8 @@ struct LocalVariableDecl { // Thumb code in an Arm program, or x86 code in a bootloader that // brings up amd64 code, etc.). struct FunctionDecl : public CallableDecl { + friend class SpecBlockContext; + public: // Address of this function in memory. std::uint64_t address{0}; @@ -227,6 +255,8 @@ struct FunctionDecl : public CallableDecl { std::unordered_map locals; + std::unordered_map stack_offsets; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; @@ -240,6 +270,8 @@ struct FunctionDecl : public CallableDecl { // Create a function declaration from an LLVM function. static Result Create(llvm::Function &func, const remill::Arch *arch); + + SpecBlockContext GetBlockContext(std::uint64_t addr) const; }; // A call site decl, as represented at a "near ABI" level. This is like a diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 2f021cda0..67a357365 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -155,31 +155,33 @@ class LifterOptions { // // (ptrtoint __anvill_sp) // - static llvm::Value *SymbolicStackPointerInit( - llvm::IRBuilderBase &ir, const remill::Register *sp_reg, - uint64_t func_address); + static llvm::Value *SymbolicStackPointerInit(llvm::IRBuilderBase &ir, + const remill::Register *sp_reg, + uint64_t func_address); // Initialize the program counter with a constant expression of the form: // // (add (ptrtoint __anvill_pc), ) // - static llvm::Value *SymbolicProgramCounterInit( - llvm::IRBuilderBase &ir, const remill::Register *pc_reg, - uint64_t func_address); + static llvm::Value *SymbolicProgramCounterInit(llvm::IRBuilderBase &ir, + const remill::Register *pc_reg, + uint64_t func_address); // Initialize the return address with a constant expression of the form: // // (ptrtoint __anvill_ra) // - static llvm::Value *SymbolicReturnAddressInit( - llvm::IRBuilderBase &ir, llvm::IntegerType *type, uint64_t func_address); + static llvm::Value *SymbolicReturnAddressInit(llvm::IRBuilderBase &ir, + llvm::IntegerType *type, + uint64_t func_address); // Initialize the return address with the result of: // // call llvm.returnaddress(0) // - static llvm::Value *ConcreteReturnAddressInit( - llvm::IRBuilderBase &ir, llvm::IntegerType *type, uint64_t func_address); + static llvm::Value *ConcreteReturnAddressInit(llvm::IRBuilderBase &ir, + llvm::IntegerType *type, + uint64_t func_address); inline explicit LifterOptions( @@ -200,7 +202,8 @@ class LifterOptions { add_breakpoints(false), track_provenance(false), //TODO(ian): This should be initialized by an OS + arch pair - stack_pointer_is_signed(false), should_remove_anvill_pc(true) { + stack_pointer_is_signed(false), + should_remove_anvill_pc(true) { CheckModuleContextMatchesArch(); } @@ -276,7 +279,7 @@ class LifterOptions { // Should we treat the stack pointer as signed when simplifying sign flags. bool stack_pointer_is_signed : 1; - bool should_remove_anvill_pc: 1; + bool should_remove_anvill_pc : 1; private: LifterOptions(void) = delete; @@ -358,7 +361,7 @@ class ValueLifter { // Returns an `llvm::Constant *` if the pointer is associated with a // known or plausible entity, and an `nullptr` otherwise. llvm::Constant *Lift(std::uint64_t ea, llvm::Type *value_type, - unsigned address_space=0u) const; + unsigned address_space = 0u) const; private: std::shared_ptr impl; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index e75a3262f..eaa42abfc 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -102,4 +103,6 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); +std::optional GetBasicBlockAddr(llvm::Function *func); + } // namespace anvill diff --git a/lib/ABI.cpp b/lib/ABI.cpp index ba862ee42..082527208 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -65,21 +65,23 @@ const std::string kGlobalAliasNamePrefix("data_"); const std::string kSymbolicStackFrameValuePrefix(kAnvillNamePrefix + "stack_"); // The anvill function used to handle complete switch cases -const std::string kAnvillSwitchCompleteFunc( - kAnvillNamePrefix + "complete_switch"); +const std::string kAnvillSwitchCompleteFunc(kAnvillNamePrefix + + "complete_switch"); // The anvill function used to handle incomplete switch cases -const std::string kAnvillSwitchIncompleteFunc( - kAnvillNamePrefix + "incomplete_switch"); +const std::string kAnvillSwitchIncompleteFunc(kAnvillNamePrefix + + "incomplete_switch"); // The name of the uninterpreted function that implements data provenance // tracking. -const std::string kAnvillDataProvenanceFunc( - kAnvillNamePrefix + "data_provenance"); +const std::string kAnvillDataProvenanceFunc(kAnvillNamePrefix + + "data_provenance"); // Metadata ID for annotating stack frame `alloca` instructions, and telling // us that what the logical "zero offset" is away from the beginning of the // `alloca`. const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); +const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); + } // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8ad387299..3acd4bb71 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -84,6 +84,7 @@ set(anvill_lifters_HEADERS "Lifters/EntityLifter.h" "Lifters/FunctionLifter.h" "Lifters/ValueLifter.h" + "Lifters/BasicBlockTransform.h" ) set(anvill_lifters_SOURCES @@ -92,6 +93,7 @@ set(anvill_lifters_SOURCES "Lifters/FunctionLifter.cpp" "Lifters/Options.cpp" "Lifters/ValueLifter.cpp" + "Lifters/BasicBlockTransform.cpp" ) set(anvill_providers_SOURCES diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 44c83be3f..9986dd7c9 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -29,6 +29,8 @@ #include #include +#include + #include "Arch/Arch.h" #include "Protobuf.h" @@ -92,6 +94,26 @@ FunctionDecl::DeclareInModule(std::string_view name, return func; } +std::vector SpecBlockContext::GetAvailableVariables() const { + std::vector decls; + for (auto p : this->decl.params) { + decls.push_back(p); + } + + for (auto [nm, l] : this->decl.locals) { + if (l.values.size() == 1) { + + ParameterDecl d = { + {l.values[0].reg, l.values[0].mem_reg, l.values[0].mem_offset, + l.values[0].spec_type, l.values[0].type}, + nm}; + decls.push_back(std::move(d)); + } + } + + return decls; +} + // Interpret `target` as being the function to call, and call it from within // a basic block in a lifted bitcode function. Returns the new value of the // memory pointer. @@ -282,4 +304,9 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { } } +SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { + return SpecBlockContext(*this); +} + + } // namespace anvill diff --git a/lib/Lifters/BasicBlockTransform.cpp b/lib/Lifters/BasicBlockTransform.cpp new file mode 100644 index 000000000..d0df0c8cf --- /dev/null +++ b/lib/Lifters/BasicBlockTransform.cpp @@ -0,0 +1,79 @@ +#include "BasicBlockTransform.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "anvill/ABI.h" +#include "anvill/Lifters.h" + +namespace anvill { +Transformed +CallAndInitializeParameters::TransformInternal(const AnvillBasicBlock &bb) { + std::vector args( + bb.basic_block_repr_func->getFunctionType()->param_begin(), + bb.basic_block_repr_func->getFunctionType()->param_end()); + + + auto num_bb_func_pars = args.size(); + auto vars = bb.context.GetAvailableVariables(); + for (const auto &v : vars) { + args.push_back(v.type); + } + + auto ntype = llvm::FunctionType::get( + bb.basic_block_repr_func->getReturnType(), args, false); + + auto nfunc = llvm::Function::Create(ntype, llvm::GlobalValue::ExternalLinkage, + bb.basic_block_repr_func->getName(), + bb.basic_block_repr_func->getParent()); + + + llvm::ValueToValueMapTy mp; + llvm::SmallVector rets; + llvm::CloneFunctionInto(nfunc, bb.basic_block_repr_func, mp, + llvm::CloneFunctionChangeType::LocalChangesOnly, + rets); + + llvm::IRBuilder<> ir(&nfunc->getEntryBlock()); + + llvm::Value *mem_ptr = nfunc->getArg(remill::kMemoryPointerArgNum); + auto state_ptr = nfunc->getArg(remill::kStatePointerArgNum); + + auto dummy = new llvm::GlobalVariable( + mem_ptr->getType(), false, + llvm::GlobalValue::LinkageTypes::ExternalLinkage); + + mem_ptr->replaceAllUsesWith(dummy); + + for (size_t i = 0; i < vars.size(); i++) { + llvm::Value *native_val = nfunc->getArg(i + num_bb_func_pars); + auto decl = vars[i]; + mem_ptr = StoreNativeValue(native_val, decl, this->types, this->intrinsics, + ir.GetInsertBlock(), state_ptr, mem_ptr); + } + + dummy->replaceAllUsesWith(mem_ptr); + dummy->eraseFromParent(); + + + return {nfunc, vars}; +} + +Transformed BasicBlockTransform::Transform(const AnvillBasicBlock &bb) { + auto res = this->TransformInternal(bb); + res.new_func->setMetadata( + kBasicBlockMetadata, + bb.basic_block_repr_func->getMetadata(kBasicBlockMetadata)); + return res; +} + + +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockTransform.h b/lib/Lifters/BasicBlockTransform.h new file mode 100644 index 000000000..5a4117e2a --- /dev/null +++ b/lib/Lifters/BasicBlockTransform.h @@ -0,0 +1,50 @@ + +#include +#include +#include +#include + +#include +namespace anvill { + +struct AnvillBasicBlock { + llvm::Function *basic_block_repr_func; + const BasicBlockContext &context; +}; + + +struct Transformed { + llvm::Function *new_func; + std::vector appended_args; +}; + +class BasicBlockTransform { + public: + BasicBlockTransform(const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics) + : types(types), + intrinsics(intrinsics) {} + + public: + virtual Transformed Transform(const AnvillBasicBlock &bb); + + protected: + virtual Transformed TransformInternal(const AnvillBasicBlock &bb) { + return {bb.basic_block_repr_func, {}}; + }; + + const TypeDictionary &types; + const remill::IntrinsicTable &intrinsics; +}; + + +class CallAndInitializeParameters : public BasicBlockTransform { + protected: + virtual Transformed TransformInternal(const AnvillBasicBlock &bb); + + public: + CallAndInitializeParameters(const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics) + : BasicBlockTransform(types, intrinsics) {} +}; +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index b9e70a57d..1320e8e66 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -27,10 +28,13 @@ #include #include #include +#include #include #include +#include #include #include +#include #include #include #include @@ -44,6 +48,7 @@ #include #include +#include #include #include #include @@ -992,6 +997,7 @@ void FunctionLifter::VisitInstruction( curr_inst = nullptr; } + // In the process of lifting code, we may want to call another native // function, `native_func`, for which we have high-level type info. The main // lifter operates on a special three-argument form function style, and @@ -1027,6 +1033,12 @@ llvm::MDNode *FunctionLifter::GetPCAnnotation(uint64_t pc) const { } } +llvm::MDNode *FunctionLifter::GetBasicBlockAnnotation(uint64_t addr) const { + auto pc_val = llvm::ConstantInt::get(address_type, addr); + auto pc_md = llvm::ValueAsMetadata::get(pc_val); + return llvm::MDNode::get(llvm_context, pc_md); +} + // Declare the function decl `decl` and return an `llvm::Function *`. llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { const auto func_type = llvm::dyn_cast( @@ -1548,6 +1560,31 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( return bb; } + +llvm::CallInst *FunctionLifter::CallBasicBlockFunction( + uint64_t block_addr, llvm::BasicBlock *add_to_llvm, llvm::Function *bb_func, + llvm::ArrayRef extra_args, llvm::Instruction *IP) const { + llvm::IRBuilder<> builder(add_to_llvm); + if (IP) { + builder.SetInsertPoint(IP); + } + std::vector args(remill::kNumBlockArgs + 1); + args[remill::kStatePointerArgNum] = state_ptr; + args[remill::kPCArgNum] = + options.program_counter_init_procedure(builder, pc_reg, block_addr); + args[remill::kMemoryPointerArgNum] = + remill::LoadMemoryPointer(add_to_llvm, this->intrinsics); + + args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(add_to_llvm); + + for (auto earg : extra_args) { + args.push_back(earg); + } + + return builder.CreateCall(bb_func, args); +} + + void FunctionLifter::VisitBlock(CodeBlock blk) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); @@ -1557,16 +1594,10 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - std::array args; - args[remill::kStatePointerArgNum] = state_ptr; - args[remill::kPCArgNum] = - options.program_counter_init_procedure(builder, pc_reg, blk.addr); - args[remill::kMemoryPointerArgNum] = - remill::LoadMemoryPointer(llvm_blk, this->intrinsics); - args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(llvm_blk); - auto new_mem_ptr = builder.CreateCall(bb_lifted_func.func, args); + auto new_mem_ptr = + this->CallBasicBlockFunction(blk.addr, llvm_blk, bb_lifted_func.func); auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); @@ -1614,6 +1645,9 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, name, this->semantics_module.get()); + func->setMetadata(anvill::kBasicBlockMetadata, + GetBasicBlockAnnotation(block.addr)); + auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); @@ -1782,13 +1816,38 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // functions into `native_func`. RecursivelyInlineLiftedFunctionIntoNativeFunction(); - - this->native_func->dump(); - - + CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); + this->ApplyBasicBlockTransform(param_pass); return native_func; } +void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { + for (auto &insn : llvm::instructions(this->native_func)) { + if (llvm::CallInst *call = llvm::dyn_cast(&insn)) { + auto addr = GetBasicBlockAddr(call->getCalledFunction()); + if (addr) { + auto cont = this->curr_decl->GetBlockContext(*addr); + AnvillBasicBlock block = {call->getCalledFunction(), cont}; + auto res = transform.Transform(block); + std::vector lifted_values; + + for (auto arg : res.appended_args) { + lifted_values.push_back(LoadLiftedValue( + arg, this->options.TypeDictionary(), this->intrinsics, + call->getParent(), + call->getArgOperand(remill::kStatePointerArgNum), + call->getArgOperand(remill::kMemoryPointerArgNum))); + } + auto new_call = this->CallBasicBlockFunction( + *addr, call->getParent(), res.new_func, lifted_values, call); + call->replaceAllUsesWith(new_call); + // TODO(Ian): need to setup metadata in transform + } + }; + } +} + + // Returns the address of a named function. std::optional FunctionLifter::AddressOfNamedFunction(const std::string &func_name) const { @@ -1868,6 +1927,7 @@ llvm::Function *EntityLifter::LiftEntity(const FunctionDecl &decl) const { } } + return func_in_target_module; } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 50ec595a1..218f8abe4 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -29,6 +29,8 @@ #include #include +#include "BasicBlockTransform.h" + namespace llvm { class Constant; class Function; @@ -93,6 +95,12 @@ class FunctionLifter { const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; + llvm::CallInst * + CallBasicBlockFunction(uint64_t block_addr, llvm::BasicBlock *add_to_llvm, + llvm::Function *bb_func, + llvm::ArrayRef extra_args = {}, + llvm::Instruction *IP = {}) const; + private: const LifterOptions &options; const MemoryProvider &memory_provider; @@ -203,6 +211,9 @@ class FunctionLifter { // not doing annotations. llvm::MDNode *GetPCAnnotation(uint64_t pc) const; + // A metadata node that communicates that this value (should be a function represents the basic block at address x) + llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; + // Declare the function decl `decl` and return an `llvm::Function *`. The // returned function is a "high-level" function. llvm::Function *GetOrDeclareFunction(const FunctionDecl &decl); @@ -218,6 +229,8 @@ class FunctionLifter { // instruction. llvm::BasicBlock *GetOrCreateBlock(uint64_t addr); + void ApplyBasicBlockTransform(BasicBlockTransform &transform); + // Attempts to lookup any redirection of the given address, and then // calls GetOrCreateBlock llvm::BasicBlock * diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 39ae05524..d7dc05129 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -20,12 +20,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include namespace anvill { @@ -751,4 +753,15 @@ bool CanBeAliased(llvm::Value *val) { } } +std::optional GetBasicBlockAddr(llvm::Function *func) { + auto meta = func->getMetadata(kBasicBlockMetadata); + if (!meta) { + return std::nullopt; + } + + auto v = llvm::cast(meta->getOperand(0))->getValue(); + + return llvm::cast(v)->getLimitedValue(); +} + } // namespace anvill diff --git a/tests/anvill_passes/src/TestAbstractStackBB.cpp b/tests/anvill_passes/src/TestAbstractStackBB.cpp index d55f56728..3c79eb930 100644 --- a/tests/anvill_passes/src/TestAbstractStackBB.cpp +++ b/tests/anvill_passes/src/TestAbstractStackBB.cpp @@ -24,63 +24,6 @@ namespace anvill { -class BasicBlockContext { - public: - virtual const std::vector &GetAvailableVariables() const = 0; -}; - -struct AnvillBasicBlock { - llvm::Function *func; - const BasicBlockContext &context; -}; - - -class InitializeRegisterParameterPass { - private: - AnvillBasicBlock basic_block; - remill::Arch::ArchPtr arch; - - - public: - InitializeRegisterParameterPass(AnvillBasicBlock basic_block_) - : basic_block(basic_block_) {} - - - llvm::Function *BuildNewFunc() { - - std::vector args( - basic_block.func->getFunctionType()->param_begin(), - basic_block.func->getFunctionType()->param_end()); - - auto num_bb_params = args.size(); - auto vars = this->basic_block.context.GetAvailableVariables(); - for (const auto &v : vars) { - args.push_back(v.type); - } - - auto ntype = llvm::FunctionType::get( - this->basic_block.func->getReturnType(), args, false); - - auto nfunc = llvm::Function::Create( - ntype, llvm::GlobalValue::ExternalLinkage, - this->basic_block.func->getName(), this->basic_block.func->getParent()); - - - llvm::ValueToValueMapTy mp; - llvm::SmallVector rets; - llvm::CloneFunctionInto(nfunc, this->basic_block.func, mp, - llvm::CloneFunctionChangeType::LocalChangesOnly, - rets); - - nfunc->dump(); - - return nfunc; - } - - - void run() {} -}; - /* Register pass plan: 1. iterate through all available paramater decls declaring them in the signature. @@ -97,13 +40,6 @@ Stack pass plan: */ -class MockBasicBlockContext : BasicBlockContext { - std::vector paramdecls; - - public: - virtual const std::vector &GetAvailableVariables() const = 0; -}; - TEST_SUITE("Basic Block tests") { TEST_CASE("Convert parameters") { auto llvm_context = anvill::CreateContextWithOpaquePointers(); From d016dc87fbef8b95f4f6c42a4baff74bf5048ac5 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 07:52:49 -0500 Subject: [PATCH 023/163] fix cloning and attach gv to module --- lib/Lifters/BasicBlockTransform.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/Lifters/BasicBlockTransform.cpp b/lib/Lifters/BasicBlockTransform.cpp index d0df0c8cf..107d88fa0 100644 --- a/lib/Lifters/BasicBlockTransform.cpp +++ b/lib/Lifters/BasicBlockTransform.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,12 @@ CallAndInitializeParameters::TransformInternal(const AnvillBasicBlock &bb) { llvm::ValueToValueMapTy mp; + + for (size_t i = 0; + i < bb.basic_block_repr_func->getFunctionType()->getNumParams(); i++) { + mp.insert({bb.basic_block_repr_func->getArg(i), nfunc->getArg(i)}); + } + llvm::SmallVector rets; llvm::CloneFunctionInto(nfunc, bb.basic_block_repr_func, mp, llvm::CloneFunctionChangeType::LocalChangesOnly, @@ -47,9 +54,10 @@ CallAndInitializeParameters::TransformInternal(const AnvillBasicBlock &bb) { llvm::Value *mem_ptr = nfunc->getArg(remill::kMemoryPointerArgNum); auto state_ptr = nfunc->getArg(remill::kStatePointerArgNum); - auto dummy = new llvm::GlobalVariable( - mem_ptr->getType(), false, - llvm::GlobalValue::LinkageTypes::ExternalLinkage); + + llvm::GlobalVariable *dummy = new llvm::GlobalVariable( + *bb.basic_block_repr_func->getParent(), mem_ptr->getType(), false, + llvm::GlobalValue::ExternalLinkage, nullptr, ""); mem_ptr->replaceAllUsesWith(dummy); From 908c1356fb425069137f7e31bca51d36bc2cd8ff Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 08:41:43 -0500 Subject: [PATCH 024/163] do sig pass before opt --- lib/Lifters/FunctionLifter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 1320e8e66..15ebb6ec2 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1812,12 +1812,12 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { CallLiftedFunctionFromNativeFunction(decl); + CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); + this->ApplyBasicBlockTransform(param_pass); // The last stage is that we need to recursively inline all calls to semantics // functions into `native_func`. RecursivelyInlineLiftedFunctionIntoNativeFunction(); - CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); - this->ApplyBasicBlockTransform(param_pass); return native_func; } From 7cd6bcd57ddff468527b0ca2605287c03c5f4bc9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 09:02:09 -0500 Subject: [PATCH 025/163] update spec --- data_specifications/specification.proto | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index f6e557c22..dd5f3057a 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -188,13 +188,28 @@ message CodeBlock { map context_assignments = 6; } +message Variables { + repeated Variable vars = 1; +} + +message StackEffects { + map allocations = 1; + map frees = 2; + repeated Variable missed_allocs = 3; + repeated Variable missed_frees = 4; +} + message Function { uint64 entry_address = 1; FunctionLinkage func_linkage = 3; Callable callable = 4; map blocks = 5; map local_variables = 6; + + // Keys are addresses of code blocks, each block + // may have a corresponding context map block_context = 7; + StackEffects stack_effects = 8; } message GlobalVariable { From c3473225009a733890147ff3bf24756dfd0353b9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 11:33:04 -0500 Subject: [PATCH 026/163] fix ordering in basic block functions, need to fix parents --- lib/Lifters/BasicBlockTransform.cpp | 29 +++++++++++++++++------------ lib/Lifters/FunctionLifter.cpp | 16 +++++++++++++--- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/lib/Lifters/BasicBlockTransform.cpp b/lib/Lifters/BasicBlockTransform.cpp index 107d88fa0..a3b4adaa5 100644 --- a/lib/Lifters/BasicBlockTransform.cpp +++ b/lib/Lifters/BasicBlockTransform.cpp @@ -3,14 +3,18 @@ #include #include #include +#include #include #include #include +#include #include #include +#include #include #include #include +#include #include "anvill/ABI.h" #include "anvill/Lifters.h" @@ -50,27 +54,28 @@ CallAndInitializeParameters::TransformInternal(const AnvillBasicBlock &bb) { rets); llvm::IRBuilder<> ir(&nfunc->getEntryBlock()); + // TODO(Ian): instead of doing this allow StoreNative to take an insertion point that isnt a block end. + auto cont_block = + llvm::cast(nfunc->getEntryBlock().getTerminator()) + ->getSuccessor(0); + nfunc->getEntryBlock().getTerminator()->eraseFromParent(); - llvm::Value *mem_ptr = nfunc->getArg(remill::kMemoryPointerArgNum); auto state_ptr = nfunc->getArg(remill::kStatePointerArgNum); - - llvm::GlobalVariable *dummy = new llvm::GlobalVariable( - *bb.basic_block_repr_func->getParent(), mem_ptr->getType(), false, - llvm::GlobalValue::ExternalLinkage, nullptr, ""); - - mem_ptr->replaceAllUsesWith(dummy); + auto mem_ptr_ref = remill::LoadMemoryPointerRef(&nfunc->getEntryBlock()); + auto mem_ptr_ty = nfunc->getArg(remill::kMemoryPointerArgNum)->getType(); for (size_t i = 0; i < vars.size(); i++) { llvm::Value *native_val = nfunc->getArg(i + num_bb_func_pars); auto decl = vars[i]; - mem_ptr = StoreNativeValue(native_val, decl, this->types, this->intrinsics, - ir.GetInsertBlock(), state_ptr, mem_ptr); + auto mem_ptr = ir.CreateLoad(mem_ptr_ty, mem_ptr_ref); + auto new_mem = + StoreNativeValue(native_val, decl, this->types, this->intrinsics, + ir.GetInsertBlock(), state_ptr, mem_ptr); + ir.CreateStore(new_mem, mem_ptr_ref); } - dummy->replaceAllUsesWith(mem_ptr); - dummy->eraseFromParent(); - + llvm::BranchInst::Create(cont_block, &nfunc->getEntryBlock()); return {nfunc, vars}; } diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 15ebb6ec2..aaedc7e20 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1522,7 +1522,14 @@ void FunctionLifter::ApplyInterProceduralControlFlowOverride( llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( BasicBlockFunction &basic_block_function, const CodeBlock &blk) { - auto bb = &basic_block_function.func->getEntryBlock(); + auto entry_block = &basic_block_function.func->getEntryBlock(); + + auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), + "", basic_block_function.func); + + + llvm::BranchInst::Create(bb, entry_block); + remill::Instruction inst; auto reached_addr = blk.addr; @@ -1556,7 +1563,6 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( llvm::ReturnInst::Create(bb->getContext(), memory, bb); this->RecursivelyInlineFunctionCallees(basic_block_function.func); - bb->getParent()->dump(); return bb; } @@ -1822,12 +1828,15 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { } void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { - for (auto &insn : llvm::instructions(this->native_func)) { + for (auto &insn : llvm::instructions(this->lifted_func)) { if (llvm::CallInst *call = llvm::dyn_cast(&insn)) { auto addr = GetBasicBlockAddr(call->getCalledFunction()); + LOG(INFO) << "getting basic block addr " + << remill::LLVMThingToString(call); if (addr) { auto cont = this->curr_decl->GetBlockContext(*addr); AnvillBasicBlock block = {call->getCalledFunction(), cont}; + LOG(INFO) << "transforming"; auto res = transform.Transform(block); std::vector lifted_values; @@ -1840,6 +1849,7 @@ void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { } auto new_call = this->CallBasicBlockFunction( *addr, call->getParent(), res.new_func, lifted_values, call); + call->getParent()->dump(); call->replaceAllUsesWith(new_call); // TODO(Ian): need to setup metadata in transform } From ffb4b64519084b9e069584e056996082f8d8e75d Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 12:48:35 -0500 Subject: [PATCH 027/163] fix iterator invalidation and referencing wrong state --- lib/Lifters/FunctionLifter.cpp | 60 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index aaedc7e20..e764c3798 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -1575,7 +1577,8 @@ llvm::CallInst *FunctionLifter::CallBasicBlockFunction( builder.SetInsertPoint(IP); } std::vector args(remill::kNumBlockArgs + 1); - args[remill::kStatePointerArgNum] = state_ptr; + args[remill::kStatePointerArgNum] = this->state_ptr; + args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, block_addr); args[remill::kMemoryPointerArgNum] = @@ -1813,13 +1816,15 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // Go lift all instructions! VisitBlocks(); + + CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); + this->ApplyBasicBlockTransform(param_pass); + // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. CallLiftedFunctionFromNativeFunction(decl); - CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); - this->ApplyBasicBlockTransform(param_pass); // The last stage is that we need to recursively inline all calls to semantics // functions into `native_func`. RecursivelyInlineLiftedFunctionIntoNativeFunction(); @@ -1828,32 +1833,43 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { } void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { + llvm::SmallVector, 10> calls; for (auto &insn : llvm::instructions(this->lifted_func)) { if (llvm::CallInst *call = llvm::dyn_cast(&insn)) { auto addr = GetBasicBlockAddr(call->getCalledFunction()); LOG(INFO) << "getting basic block addr " << remill::LLVMThingToString(call); if (addr) { - auto cont = this->curr_decl->GetBlockContext(*addr); - AnvillBasicBlock block = {call->getCalledFunction(), cont}; - LOG(INFO) << "transforming"; - auto res = transform.Transform(block); - std::vector lifted_values; - - for (auto arg : res.appended_args) { - lifted_values.push_back(LoadLiftedValue( - arg, this->options.TypeDictionary(), this->intrinsics, - call->getParent(), - call->getArgOperand(remill::kStatePointerArgNum), - call->getArgOperand(remill::kMemoryPointerArgNum))); - } - auto new_call = this->CallBasicBlockFunction( - *addr, call->getParent(), res.new_func, lifted_values, call); - call->getParent()->dump(); - call->replaceAllUsesWith(new_call); - // TODO(Ian): need to setup metadata in transform + calls.emplace_back(call, *addr); } - }; + } + } + + for (auto [call, addr] : calls) { + // avoid iterator invalidation + auto cont = this->curr_decl->GetBlockContext(addr); + AnvillBasicBlock block = {call->getCalledFunction(), cont}; + LOG(INFO) << "transforming"; + auto res = transform.Transform(block); + std::vector lifted_values; + auto old_block = call->getParent(); + auto new_block = llvm::SplitBlock(call->getParent(), call); + + old_block->getTerminator()->eraseFromParent(); + for (auto arg : res.appended_args) { + lifted_values.push_back(LoadLiftedValue( + arg, this->options.TypeDictionary(), this->intrinsics, old_block, + call->getArgOperand(remill::kStatePointerArgNum), + call->getArgOperand(remill::kMemoryPointerArgNum))); + } + auto new_call = this->CallBasicBlockFunction(addr, old_block, res.new_func, + lifted_values, call); + + llvm::BranchInst::Create(new_block, old_block); + call->replaceAllUsesWith(new_call); + call->eraseFromParent(); + llvm::MergeBlockIntoPredecessor(new_block); + // TODO(Ian): need to setup metadata in transform } } From f0315cce486879f98d62b07e480dfb0600ae726c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 29 Nov 2022 12:54:31 -0500 Subject: [PATCH 028/163] fix func names --- lib/Lifters/FunctionLifter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index e764c3798..6a714e277 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1867,8 +1867,13 @@ void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { llvm::BranchInst::Create(new_block, old_block); call->replaceAllUsesWith(new_call); + std::string fname = std::string(call->getCalledFunction()->getName()); call->eraseFromParent(); + call->getCalledFunction()->eraseFromParent(); llvm::MergeBlockIntoPredecessor(new_block); + res.new_func->setName(fname); + + // TODO(Ian): need to setup metadata in transform } } From b56c3b8040b1aa68767367d4971d666edd5aabe6 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Sat, 3 Dec 2022 14:10:10 -0500 Subject: [PATCH 029/163] insert offsets --- include/anvill/Declarations.h | 12 ++------ include/anvill/Lifters.h | 4 +++ lib/Lifters/FunctionLifter.cpp | 31 +++++++++++++------- lib/Lifters/Options.cpp | 53 ++++++++++++++++++++++------------ lib/Protobuf.cpp | 40 ++++++++++++++++++------- 5 files changed, 91 insertions(+), 49 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index ec56cea7a..ff07a593d 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -42,13 +42,6 @@ struct Register; } // namespace remill namespace anvill { -struct RegisterOffset { - const remill::Register *target; - - // A null base indicates a pure offset - const remill::Register *base; - std::int64_t offset; -}; struct CodeBlock { uint64_t addr; @@ -58,13 +51,12 @@ struct CodeBlock { // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a block. std::unordered_map context_assignments; - std::vector register_offsets; }; struct OffsetDomain { - remill::Register *target_register; - std::optional base_register; + const remill::Register *target_register; + std::optional base_register; std::int64_t offset; }; struct SpecStackOffsets { diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 67a357365..8334b7e5a 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -155,6 +155,10 @@ class LifterOptions { // // (ptrtoint __anvill_sp) // + static llvm::Value *SymbolicStackPointerInitWithOffset( + llvm::IRBuilderBase &ir, const remill::Register *sp_reg, + uint64_t func_address, std::int64_t offset); + static llvm::Value *SymbolicStackPointerInit(llvm::IRBuilderBase &ir, const remill::Register *sp_reg, uint64_t func_address); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 6a714e277..37eb90fcd 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -10,6 +10,7 @@ #include <_types/_uint64_t.h> #include +#include #include #include #include @@ -1669,18 +1670,26 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { options.arch->InitializeEmptyLiftedFunction(func); auto &blk = func->getEntryBlock(); - for (auto ®_off : block.register_offsets) { - llvm::Value *new_value = - llvm::ConstantInt::get(pc_reg->type, reg_off.offset, true); - if (reg_off.base) { - new_value = llvm::BinaryOperator::Create( - llvm::BinaryOperator::Add, new_value, - op_lifter->LoadRegValue(&blk, state, reg_off.base->name), - llvm::Twine(), &blk); + llvm::IRBuilder<> ir(&blk); + // Put registers that are referencing the stack in terms of their displacement so that we + // Can resolve these stack references later . + + + auto stack_offsets = this->curr_decl->stack_offsets.find(block.addr); + + if (stack_offsets != this->curr_decl->stack_offsets.end()) { + for (auto ®_off : stack_offsets->second.affine_equalities) { + if (reg_off.base_register && reg_off.base_register == this->sp_reg) { + auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( + ir, this->sp_reg, block.addr, reg_off.offset); + LOG(INFO) << reg_off.target_register->name; + StoreNativeValueToRegister(new_value, reg_off.target_register, + type_provider.Dictionary(), intrinsics, &blk, + state); + + blk.dump(); + } } - StoreNativeValueToRegister(new_value, reg_off.target, - type_provider.Dictionary(), intrinsics, &blk, - state); } auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); diff --git a/lib/Lifters/Options.cpp b/lib/Lifters/Options.cpp index 6b6a06d59..6d60edb47 100644 --- a/lib/Lifters/Options.cpp +++ b/lib/Lifters/Options.cpp @@ -6,15 +6,15 @@ * the LICENSE file found in the root directory of this source tree. */ -#include - #include +#include #include #include #include -#include +#include #include #include +#include #include #include @@ -38,12 +38,10 @@ const ::anvill::TypeDictionary &LifterOptions::TypeDictionary(void) const { return type_provider.Dictionary(); } -// Initialize the stack frame with a constant expression of the form: -// -// (ptrtoint __anvill_sp) -llvm::Value *LifterOptions::SymbolicStackPointerInit( + +llvm::Value *LifterOptions::SymbolicStackPointerInitWithOffset( llvm::IRBuilderBase &ir, const remill::Register *sp_reg, - uint64_t func_address) { + uint64_t func_address, std::int64_t offset) { auto &context = ir.getContext(); auto block = ir.GetInsertBlock(); @@ -58,15 +56,32 @@ llvm::Value *LifterOptions::SymbolicStackPointerInit( llvm::Constant::getNullValue(type), kSymbolicSPName); } - return llvm::ConstantExpr::getPtrToInt(base_sp, type); + auto sp = llvm::ConstantExpr::getPtrToInt(base_sp, type); + + if (offset != 0) { + return ir.CreateAdd(sp, llvm::ConstantInt::get(type, offset, true)); + } else { + return sp; + } +} + +// Initialize the stack frame with a constant expression of the form: +// +// (ptrtoint __anvill_sp) +llvm::Value * +LifterOptions::SymbolicStackPointerInit(llvm::IRBuilderBase &ir, + const remill::Register *sp_reg, + uint64_t func_address) { + return SymbolicStackPointerInitWithOffset(ir, sp_reg, func_address, 0); } // Initialize the program counter with a constant expression of the form: // // (ptrtoint __anvill_pc) -llvm::Value *LifterOptions::SymbolicProgramCounterInit( - llvm::IRBuilderBase &ir, const remill::Register *pc_reg, - uint64_t func_address) { +llvm::Value * +LifterOptions::SymbolicProgramCounterInit(llvm::IRBuilderBase &ir, + const remill::Register *pc_reg, + uint64_t func_address) { auto &context = ir.getContext(); auto block = ir.GetInsertBlock(); @@ -88,8 +103,9 @@ llvm::Value *LifterOptions::SymbolicProgramCounterInit( // Initialize the return address with a constant expression of the form: // // (ptrtoint __anvill_ra) -llvm::Value *LifterOptions::SymbolicReturnAddressInit( - llvm::IRBuilderBase &ir, llvm::IntegerType *type, uint64_t func_address) { +llvm::Value *LifterOptions::SymbolicReturnAddressInit(llvm::IRBuilderBase &ir, + llvm::IntegerType *type, + uint64_t func_address) { auto &context = ir.getContext(); auto block = ir.GetInsertBlock(); auto module = block->getModule(); @@ -107,16 +123,17 @@ llvm::Value *LifterOptions::SymbolicReturnAddressInit( // Initialize the return address with the result of: // // call llvm.returnaddress() -llvm::Value *LifterOptions::ConcreteReturnAddressInit( - llvm::IRBuilderBase &ir, llvm::IntegerType *type, uint64_t) { +llvm::Value *LifterOptions::ConcreteReturnAddressInit(llvm::IRBuilderBase &ir, + llvm::IntegerType *type, + uint64_t) { auto &context = ir.getContext(); auto block = ir.GetInsertBlock(); auto module = block->getModule(); type = llvm::dyn_cast( remill::RecontextualizeType(type, context)); - auto ret_addr_func = llvm::Intrinsic::getDeclaration( - module, llvm::Intrinsic::returnaddress); + auto ret_addr_func = + llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::returnaddress); llvm::Value *args[] = { llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 0)}; diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index d0975b383..faee7b035 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -535,26 +535,46 @@ Result ProtobufTranslator::DecodeFunction( void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { for (auto blk : obj.blocks()) { - CodeBlock nblk = {blk.second.address(), - blk.second.size(), - {blk.second.outgoing_blocks().begin(), - blk.second.outgoing_blocks().end()}, - {blk.second.context_assignments().begin(), - blk.second.context_assignments().end()}, - {}}; + CodeBlock nblk = { + blk.second.address(), + blk.second.size(), + {blk.second.outgoing_blocks().begin(), + blk.second.outgoing_blocks().end()}, + {blk.second.context_assignments().begin(), + blk.second.context_assignments().end()}, + }; decl.cfg.emplace(blk.first, std::move(nblk)); } + for (auto &[blk_addr, ctx] : obj.block_context()) { + std::vector affine_equalities; auto blk = decl.cfg[blk_addr]; for (auto &symval : ctx.symvals()) { - RegisterOffset reg_off{}; + OffsetDomain reg_off; reg_off.offset = symval.offset(); - reg_off.target = arch->RegisterByName(symval.target_reg()); + reg_off.target_register = arch->RegisterByName(symval.target_reg()); + if (!reg_off.target_register) { + LOG(ERROR) << "Missing base register for affine relation: " + << symval.target_reg(); + continue; + } if (symval.has_base()) { - reg_off.base = arch->RegisterByName(symval.base()); + reg_off.base_register = arch->RegisterByName(symval.base()); + if (!reg_off.base_register) { + LOG(ERROR) << "Missing base register for affine relation: " + << symval.base(); + continue; + } + } else { + reg_off.base_register = std::nullopt; } + + affine_equalities.push_back(reg_off); } + + SpecStackOffsets off = {affine_equalities}; + decl.stack_offsets.insert({blk_addr, off}); } } From 1528b3c99b77a3750c334dc79bf251b752521db1 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 5 Dec 2022 14:25:20 +0100 Subject: [PATCH 030/163] Add API to obtain basic block functions' addresses --- lib/Lifters/FunctionLifter.cpp | 12 +++++++++++- lib/Lifters/FunctionLifter.h | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 37eb90fcd..4af9a13c9 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1633,6 +1634,13 @@ void FunctionLifter::VisitBlocks() { } } +llvm::Function *FunctionLifter::GetBasicBlockFunction(uint64_t address) const { + auto it = addr_to_bb_func.find(address); + if (it == addr_to_bb_func.end()) { + return nullptr; + } + return it->second.func; +} BasicBlockFunction FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { @@ -1700,7 +1708,9 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { func->addFnAttr(llvm::Attribute::NoInline); func->setLinkage(llvm::GlobalValue::InternalLinkage); - return {func, state_ptr, pc_arg, mem_arg, next_pc_out}; + BasicBlockFunction bbf{func, state_ptr, pc_arg, mem_arg, next_pc_out}; + addr_to_bb_func[block.addr] = bbf; + return bbf; } LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 218f8abe4..84dc45b98 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -101,6 +101,8 @@ class FunctionLifter { llvm::ArrayRef extra_args = {}, llvm::Instruction *IP = {}) const; + llvm::Function *GetBasicBlockFunction(uint64_t address) const; + private: const LifterOptions &options; const MemoryProvider &memory_provider; @@ -207,6 +209,9 @@ class FunctionLifter { llvm::BasicBlock *invalid_successor_block{nullptr}; + // Maps basic block addresses to lifted functions + std::unordered_map addr_to_bb_func; + // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. llvm::MDNode *GetPCAnnotation(uint64_t pc) const; From d90e94276c929a4bd899b0cb45cab2c5560de5b5 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 5 Dec 2022 08:56:04 -0500 Subject: [PATCH 031/163] fix thumb mode --- data_specifications/specification.proto | 5 +++++ lib/Lifters/FunctionLifter.cpp | 14 +++++++++++--- lib/Lifters/FunctionLifter.h | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index dd5f3057a..c57a417a8 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -199,6 +199,11 @@ message StackEffects { repeated Variable missed_frees = 4; } + +message StackFrame { + uint64 frame_size = 1; +} + message Function { uint64 entry_address = 1; FunctionLinkage func_linkage = 3; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 4af9a13c9..90e75746d 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1524,6 +1524,14 @@ void FunctionLifter::ApplyInterProceduralControlFlowOverride( } } + +remill::DecodingContext +FunctionLifter::CreateDecodingContext(const CodeBlock &blk) { + auto init_context = this->options.arch->CreateInitialContext(); + return this->ApplyTargetList(blk.context_assignments, + std::move(init_context)); +} + llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( BasicBlockFunction &basic_block_function, const CodeBlock &blk) { auto entry_block = &basic_block_function.func->getEntryBlock(); @@ -1538,14 +1546,14 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( auto reached_addr = blk.addr; // TODO(Ian): use a different context - auto init_context = this->options.arch->CreateInitialContext(); - ApplyTargetList(blk.context_assignments, init_context); + + auto init_context = this->CreateDecodingContext(blk); while (reached_addr < blk.addr + blk.size) { auto addr = reached_addr; auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { - LOG(FATAL) << "Failed to decode insn in block"; + LOG(FATAL) << "Failed to decode insn in block " << std::hex << addr; } reached_addr += inst.bytes.size(); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 84dc45b98..ec952cebc 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -462,6 +462,8 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function, const CodeBlock &blk); + remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); + void VisitBlocks(); From 7f0f284ab84ff3d97fada83f9299a023f6accc93 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 5 Dec 2022 11:50:13 -0500 Subject: [PATCH 032/163] fix switch branch types --- lib/Lifters/FunctionLifter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 90e75746d..b2198ab35 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -1480,12 +1480,12 @@ bool FunctionLifter::DoInterProceduralControlFlow( builder.CreateStore(raddr, npc); builder.CreateStore(raddr, pc); } else { - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics, true); } return !cc.stop; } else if (std::holds_alternative(override)) { remill::AddTerminatingTailCall(block, intrinsics.function_return, - intrinsics); + intrinsics, true); return false; } @@ -1628,7 +1628,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { for (uint64_t succ : blk.outgoing_edges) { sw->addCase(llvm::ConstantInt::get( - llvm::IntegerType::get(this->llvm_context, 64), succ), + llvm::cast(this->pc_reg_type), succ), this->GetOrCreateBlock(succ)); } } From 6a748cf4ff093ac43eed60da8632599139641c60 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 6 Dec 2022 08:05:39 -0500 Subject: [PATCH 033/163] fix interproc control flow that is terminal in a basic block function --- lib/Lifters/FunctionLifter.cpp | 35 ++++++++++++++++++++++------------ lib/Lifters/FunctionLifter.h | 3 ++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index b2198ab35..c993ab0f8 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1491,7 +1492,7 @@ bool FunctionLifter::DoInterProceduralControlFlow( return true; } -void FunctionLifter::ApplyInterProceduralControlFlowOverride( +bool FunctionLifter::ApplyInterProceduralControlFlowOverride( const remill::Instruction &insn, llvm::BasicBlock *&block) { @@ -1517,11 +1518,13 @@ void FunctionLifter::ApplyInterProceduralControlFlowOverride( } block = continuation; - + return true; } else { - this->DoInterProceduralControlFlow(insn, block, override); + return this->DoInterProceduralControlFlow(insn, block, override); } } + + return true; } @@ -1549,7 +1552,9 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( auto init_context = this->CreateDecodingContext(blk); - while (reached_addr < blk.addr + blk.size) { + + bool ended_on_terminal = false; + while (reached_addr < blk.addr + blk.size && !ended_on_terminal) { auto addr = reached_addr; auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { @@ -1563,16 +1568,19 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( // to treat instruction lifting as an operation that can't fail. std::ignore = inst.GetLifter()->LiftIntoBlock( inst, bb, basic_block_function.state_ptr, false /* is_delayed */); - this->ApplyInterProceduralControlFlowOverride(inst, bb); - } + ended_on_terminal = + !this->ApplyInterProceduralControlFlowOverride(inst, bb); + } - llvm::IRBuilder<> builder(bb); + if (!ended_on_terminal) { + llvm::IRBuilder<> builder(bb); - builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), - basic_block_function.next_pc_out_param); - auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); - llvm::ReturnInst::Create(bb->getContext(), memory, bb); + builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), + basic_block_function.next_pc_out_param); + auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); + llvm::ReturnInst::Create(bb->getContext(), memory, bb); + } this->RecursivelyInlineFunctionCallees(basic_block_function.func); return bb; @@ -1614,7 +1622,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - + CHECK(!llvm::verifyFunction(*bb_lifted_func.func, &llvm::errs())); auto new_mem_ptr = this->CallBasicBlockFunction(blk.addr, llvm_blk, bb_lifted_func.func); @@ -1640,6 +1648,8 @@ void FunctionLifter::VisitBlocks() { DLOG(INFO) << "Visiting: " << std::hex << addr; this->VisitBlock(blk); } + + CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); } llvm::Function *FunctionLifter::GetBasicBlockFunction(uint64_t address) const { @@ -1718,6 +1728,7 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { BasicBlockFunction bbf{func, state_ptr, pc_arg, mem_arg, next_pc_out}; addr_to_bb_func[block.addr] = bbf; + return bbf; } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index ec952cebc..abc84f289 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -513,7 +513,8 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // Manipulates the control flow to restore intra-procedural state when reaching an // inter-procedural effect. - void ApplyInterProceduralControlFlowOverride(const remill::Instruction &, + // Returns a boolean represnting wether decoding should continue (true = non-terminal, false=terminal) + bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &, llvm::BasicBlock *&block); bool From 5daa0f3cb622961aa7f130b8aec2d6ea302da44f Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 6 Dec 2022 09:19:07 -0500 Subject: [PATCH 034/163] add frame size --- data_specifications/specification.proto | 3 ++- lib/Optimize.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index c57a417a8..a5bcca154 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -201,7 +201,7 @@ message StackEffects { message StackFrame { - uint64 frame_size = 1; + uint64 frame_size = 1; } message Function { @@ -215,6 +215,7 @@ message Function { // may have a corresponding context map block_context = 7; StackEffects stack_effects = 8; + StackFrame frame = 9; } message GlobalVariable { diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 865d5d619..9506c9707 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -212,8 +212,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { fpm.addPass(llvm::DCEPass()); AddRemoveStackPointerCExprs(fpm, options.stack_frame_recovery_options); - AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); - AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); + //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); + //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); fpm.addPass(llvm::SROAPass()); @@ -235,8 +235,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { llvm::FunctionPassManager second_fpm; AddTransformRemillJumpIntrinsics(second_fpm, xr); - AddRemoveRemillFunctionReturns(second_fpm, xr); - AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); + //AddRemoveRemillFunctionReturns(second_fpm, xr); + //AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); AddLowerRemillUndefinedIntrinsics(second_fpm); AddRemoveFailedBranchHints(second_fpm); second_fpm.addPass(llvm::NewGVNPass()); From a000f38a85393501e15209b0b65483c6db7d33fd Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 6 Dec 2022 14:40:49 -0500 Subject: [PATCH 035/163] separate out state ptr --- include/anvill/Declarations.h | 2 + lib/Lifters/FunctionLifter.cpp | 116 ++++++++++----------------------- lib/Lifters/FunctionLifter.h | 15 ++--- lib/Protobuf.cpp | 7 ++ 4 files changed, 48 insertions(+), 92 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index ff07a593d..038c65604 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -249,6 +249,8 @@ struct FunctionDecl : public CallableDecl { std::unordered_map stack_offsets; + std::uint64_t stack_depth; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index c993ab0f8..77f668a13 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -948,60 +948,6 @@ void FunctionLifter::InstrumentCallBreakpointFunction(llvm::BasicBlock *block) { ir.CreateCall(func, args); } -// Visit an instruction, and lift it into a basic block. Then, based off of -// the category of the instruction, invoke one of the category-specific -// lifters to enact a change in control-flow. -void FunctionLifter::VisitInstruction( - remill::Instruction &inst, llvm::BasicBlock *block, - remill::DecodingContext prev_insn_context) { - curr_inst = &inst; - - std::optional delayed_inst; - - if (options.track_provenance) { - InstrumentDataflowProvenance(block); - } - - if (options.add_breakpoints) { - InstrumentCallBreakpointFunction(block); - } - - // Even when something isn't supported or is invalid, we still lift - // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want - // to treat instruction lifting as an operation that can't fail. - std::ignore = inst.GetLifter()->LiftIntoBlock(inst, block, state_ptr, - false /* is_delayed */); - - // Figure out if we have to decode the subsequent instruction as a delayed - // instruction. - if (options.arch->MayHaveDelaySlot(inst)) { - delayed_inst = remill::Instruction(); - - if (!DecodeInstructionInto(inst.delayed_pc, true /* is_delayed */, - &*delayed_inst, prev_insn_context)) { - LOG(ERROR) << "Unable to decode or use delayed instruction at " - << std::hex << inst.delayed_pc << std::dec << " of " - << inst.Serialize(); - } - } - - // Do an initial annotation of instructions injected by `LiftIntoBlock`, - // and prior to any lifting of a delayed instruction that might happen - // in any of the below `Visit*` calls. - pc_annotation = GetPCAnnotation(inst.pc); - AnnotateInstructions(block, pc_annotation_id, pc_annotation); - - FlowVisitor visitor = {*this, inst, block, delayed_inst, prev_insn_context}; - std::visit(visitor, inst.flows); - - - // Do a second pass of annotations to apply to the control-flow branching - // instructions added in by the above `Visit*` calls. - AnnotateInstructions(block, pc_annotation_id, pc_annotation); - - curr_inst = nullptr; -} - // In the process of lifting code, we may want to call another native // function, `native_func`, for which we have high-level type info. The main @@ -1090,47 +1036,51 @@ llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { } // Allocate and initialize the state structure. -void FunctionLifter::AllocateAndInitializeStateStructure( - llvm::BasicBlock *block, const remill::Arch *arch) { +llvm::Value * +FunctionLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, + const remill::Arch *arch) { llvm::IRBuilder<> ir(block); const auto state_type = arch->StateStructType(); + llvm::Value *new_state_ptr = nullptr; + switch (options.state_struct_init_procedure) { case StateStructureInitializationProcedure::kNone: - state_ptr = ir.CreateAlloca(state_type); + new_state_ptr = ir.CreateAlloca(state_type); break; case StateStructureInitializationProcedure::kZeroes: - state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::Constant::getNullValue(state_type), state_ptr); + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); break; case StateStructureInitializationProcedure::kUndef: - state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::UndefValue::get(state_type), state_ptr); + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); break; case StateStructureInitializationProcedure::kGlobalRegisterVariables: - state_ptr = ir.CreateAlloca(state_type); + new_state_ptr = ir.CreateAlloca(state_type); InitializeStateStructureFromGlobalRegisterVariables(block); break; case StateStructureInitializationProcedure:: kGlobalRegisterVariablesAndZeroes: - state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::Constant::getNullValue(state_type), state_ptr); + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); InitializeStateStructureFromGlobalRegisterVariables(block); break; case StateStructureInitializationProcedure:: kGlobalRegisterVariablesAndUndef: - state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::UndefValue::get(state_type), state_ptr); + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); InitializeStateStructureFromGlobalRegisterVariables(block); break; } - ArchSpecificStateStructureInitialization(block); + ArchSpecificStateStructureInitialization(block, new_state_ptr); + return new_state_ptr; } // Perform architecture-specific initialization of the state structure // in `block`. void FunctionLifter::ArchSpecificStateStructureInitialization( - llvm::BasicBlock *block) { + llvm::BasicBlock *block, llvm::Value *new_state_ptr) { if (is_x86_or_amd64) { llvm::IRBuilder<> ir(block); @@ -1149,7 +1099,7 @@ void FunctionLifter::ArchSpecificStateStructureInitialization( llvm::PointerType::get(block->getContext(), 256)), llvm::PointerType::get(block->getContext(), 0)), pc_reg_type); - ir.CreateStore(gsbase_val, gsbase_reg->AddressOf(state_ptr, ir)); + ir.CreateStore(gsbase_val, gsbase_reg->AddressOf(new_state_ptr, ir)); } if (fsbase_reg) { @@ -1159,27 +1109,27 @@ void FunctionLifter::ArchSpecificStateStructureInitialization( llvm::PointerType::get(block->getContext(), 257)), llvm::PointerType::get(block->getContext(), 0)), pc_reg_type); - ir.CreateStore(fsbase_val, fsbase_reg->AddressOf(state_ptr, ir)); + ir.CreateStore(fsbase_val, fsbase_reg->AddressOf(new_state_ptr, ir)); } if (ssbase_reg) { ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - ssbase_reg->AddressOf(state_ptr, ir)); + ssbase_reg->AddressOf(new_state_ptr, ir)); } if (dsbase_reg) { ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - dsbase_reg->AddressOf(state_ptr, ir)); + dsbase_reg->AddressOf(new_state_ptr, ir)); } if (esbase_reg) { ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - esbase_reg->AddressOf(state_ptr, ir)); + esbase_reg->AddressOf(new_state_ptr, ir)); } if (csbase_reg) { ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - csbase_reg->AddressOf(state_ptr, ir)); + csbase_reg->AddressOf(new_state_ptr, ir)); } } } @@ -1237,7 +1187,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( llvm::Value *mem_ptr = llvm::Constant::getNullValue(mem_ptr_type); // Stack-allocate and initialize the state pointer. - AllocateAndInitializeStateStructure(block, decl.arch); + this->state_ptr = AllocateAndInitializeStateStructure(block, decl.arch); auto pc_ptr = pc_reg->AddressOf(state_ptr, block); auto sp_ptr = sp_reg->AddressOf(state_ptr, block); @@ -1685,18 +1635,21 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { GetBasicBlockAnnotation(block.addr)); auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); - auto state = remill::NthArgument(func, remill::kStatePointerArgNum); + auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); memory->setName("memory"); - state->setName("state"); + out_state->setName("state_out"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); options.arch->InitializeEmptyLiftedFunction(func); + auto &blk = func->getEntryBlock(); llvm::IRBuilder<> ir(&blk); + + auto state = this->AllocateAndInitializeStateStructure(&blk, options.arch); // Put registers that are referencing the stack in terms of their displacement so that we // Can resolve these stack references later . @@ -1708,17 +1661,16 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { if (reg_off.base_register && reg_off.base_register == this->sp_reg) { auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( ir, this->sp_reg, block.addr, reg_off.offset); - LOG(INFO) << reg_off.target_register->name; StoreNativeValueToRegister(new_value, reg_off.target_register, type_provider.Dictionary(), intrinsics, &blk, state); - - blk.dump(); } } } - auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); + blk.dump(); + + auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); @@ -1726,7 +1678,7 @@ FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { func->addFnAttr(llvm::Attribute::NoInline); func->setLinkage(llvm::GlobalValue::InternalLinkage); - BasicBlockFunction bbf{func, state_ptr, pc_arg, mem_arg, next_pc_out}; + BasicBlockFunction bbf{func, state, pc_arg, mem_arg, next_pc_out}; addr_to_bb_func[block.addr] = bbf; return bbf; diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index abc84f289..0b26c1bac 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -55,7 +55,7 @@ struct ControlFlowTargetList; struct BasicBlockFunction { llvm::Function *func; - llvm::Argument *state_ptr; + llvm::Value *state_ptr; llvm::Argument *pc_arg; llvm::Argument *mem_ptr; llvm::Argument *next_pc_out_param; @@ -467,12 +467,6 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, void VisitBlocks(); - // Visit an instruction, and lift it into a basic block. Then, based off of - // the category of the instruction, invoke one of the category-specific - // lifters to enact a change in control-flow. - void VisitInstruction(remill::Instruction &inst, llvm::BasicBlock *block, - remill::DecodingContext prev_insn_context); - // In the process of lifting code, we may want to call another native // function, `native_func`, for which we have high-level type info. The main // lifter operates on a special three-argument form function style, and @@ -533,12 +527,13 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // Allocate and initialize the state structure. - void AllocateAndInitializeStateStructure(llvm::BasicBlock *block, - const remill::Arch *arch); + llvm::Value *AllocateAndInitializeStateStructure(llvm::BasicBlock *block, + const remill::Arch *arch); // Perform architecture-specific initialization of the state structure // in `block`. - void ArchSpecificStateStructureInitialization(llvm::BasicBlock *block); + void ArchSpecificStateStructureInitialization(llvm::BasicBlock *block, + llvm::Value *state_ptr); // Initialize the state structure with default values, loaded from global // variables. The purpose of these global variables is to show that there are diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index faee7b035..ec43d9615 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -498,6 +498,13 @@ Result ProtobufTranslator::DecodeFunction( return parse_res.TakeError(); } + + if (!function.has_frame()) { + return std::string("All functions should have a frame"); + } + + decl.stack_depth = function.frame().frame_size(); + this->ParseCFGIntoFunction(function, decl); auto link = function.func_linkage(); From ba4d71c7c0f52ad1d4a77d7a9efc8d42485a5ca1 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 6 Dec 2022 17:19:13 -0500 Subject: [PATCH 036/163] remove deadcode --- lib/Lifters/BasicBlockLifter.h | 0 lib/Lifters/FunctionLifter.cpp | 711 +++------------------------------ lib/Lifters/FunctionLifter.h | 174 +------- 3 files changed, 65 insertions(+), 820 deletions(-) create mode 100644 lib/Lifters/BasicBlockLifter.h diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h new file mode 100644 index 000000000..e69de29bb diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 77f668a13..5f61f1cb6 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -287,27 +287,6 @@ bool FunctionLifter::DecodeInstructionInto(const uint64_t addr, bool is_delayed, } } -// Visit an invalid instruction. An invalid instruction is a sequence of -// bytes which cannot be decoded, or an empty byte sequence. -void FunctionLifter::VisitInvalid(const remill::Instruction &inst, - llvm::BasicBlock *block) { - MuteStateEscape( - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics)); -} - -// Visit an error instruction. An error instruction is guaranteed to trap -// execution somehow, e.g. `ud2` on x86. Error instructions are treated -// similarly to invalid instructions, with the exception that they can have -// delay slots, and therefore the subsequent instruction may actually execute -// prior to the error. -void FunctionLifter::VisitError( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - MuteStateEscape( - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics)); -} - void FunctionLifter::InsertError(llvm::BasicBlock *block) { llvm::IRBuilder<> ir{block}; auto tail = remill::AddTerminatingTailCall( @@ -316,75 +295,6 @@ void FunctionLifter::InsertError(llvm::BasicBlock *block) { AnnotateInstruction(tail, pc_annotation_id, pc_annotation); } -// Visit a normal instruction. Normal instructions have straight line control- -// flow semantics, i.e. after executing the instruction, execution proceeds -// to the next instruction (`inst.next_pc`). -void FunctionLifter::VisitNormal( - const remill::Instruction &inst, llvm::BasicBlock *block, - const remill::Instruction::NormalInsn &mapper) { - auto cf = options.control_flow_provider.GetControlFlowOverride(inst.pc); - bool stop = false; - - if (std::holds_alternative(cf)) { - auto spec = std::get(cf); - stop = spec.stop; - } else if (!std::holds_alternative(cf)) { - LOG(ERROR) - << "Found invalid control flow override for normal instruction at " - << std::hex << inst.pc; - } - - if (stop) { - InsertError(block); - } else { - llvm::BranchInst::Create( - GetOrCreateTargetBlock(inst, inst.next_pc, - mapper.fallthrough.fallthrough_context), - block); - } -} - -// Visit a no-op instruction. These behave identically to normal instructions -// from a control-flow perspective. -void FunctionLifter::VisitNoOp(const remill::Instruction &inst, - llvm::BasicBlock *block, - const remill::Instruction::NoOp &mapper) { - VisitNormal(inst, block, mapper.fallthrough); -} - -// Visit a direct jump control-flow instruction. The target of the jump is -// known at decode time, and the target address is available in -// `inst.branch_taken_pc`. Execution thus needs to transfer to the instruction -// (and thus `llvm::BasicBlock`) associated with `inst.branch_taken_pc`. -void FunctionLifter::VisitDirectJump( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - const remill::Instruction::DirectJump &mapper) { - auto cf = options.control_flow_provider.GetControlFlowOverride(inst.pc); - if (std::holds_alternative(cf)) { - auto jmp_spec = std::get(cf); - - if (jmp_spec.targets.size() != 1) { - LOG(FATAL) << "Invalid number of targets for direct jump at " << std::hex - << inst.pc; - } - - CHECK_EQ(mapper.taken_flow.known_target, jmp_spec.targets[0].address) - << "Spec and remill don't agree on jump target at " << std::hex - << inst.pc; - VisitDelayedInstruction(inst, delayed_inst, block, true); - llvm::BranchInst::Create( - GetOrCreateTargetBlock(inst, mapper.taken_flow.known_target, - mapper.taken_flow.static_context), - block); - } else if (std::holds_alternative(cf)) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - CallFunction(inst, block, inst.branch_taken_pc); - InsertError(block); - } else { - LOG(FATAL) << "Invalid spec for direct jump at " << std::hex << inst.pc; - } -} remill::DecodingContext FunctionLifter::ApplyTargetList( const std::unordered_map &assignments, @@ -395,194 +305,6 @@ remill::DecodingContext FunctionLifter::ApplyTargetList( return prev_context; } -// Visit an indirect jump that is a jump table. -void FunctionLifter::DoSwitchBasedIndirectJump( - const remill::Instruction &inst, llvm::BasicBlock *block, - const std::vector &target_list, - const remill::Instruction::IndirectJump &mapper, - const remill::DecodingContext &prev_context) { - - auto add_remill_jump{true}; - llvm::BasicBlock *current_bb = block; - - // This is a list of the possibilities we want to cover: - // - // 1. No target: AddTerminatingTailCall - // 2. Single target, complete: normal jump - // 3. Multiple targets, complete: switch with no default case - // 4. Single or multiple targets, not complete: switch with default case - // containing AddTerminatingTailCall - - - // If the target list is complete and has only one destination, then we - // can handle it as normal jump - if (target_list.size() == 1U) { - add_remill_jump = false; - - auto destination = target_list[0]; - - - llvm::BranchInst::Create( - GetOrCreateTargetBlock(inst, destination.address, prev_context), block); - - // We have multiple destinations. Handle this with a switch. If the target - // list is not marked as complete, then we'll still add __remill_jump - // inside the default block - } else { - llvm::BasicBlock *default_case{nullptr}; - - // Create a default case that is not reachable - add_remill_jump = false; - default_case = llvm::BasicBlock::Create(llvm_context, "", lifted_func); - - llvm::IRBuilder<> builder(default_case); - builder.CreateUnreachable(); - - // Create the parameters for the special anvill switch - auto pc = this->op_lifter->LoadRegValue( - block, state_ptr, options.arch->ProgramCounterRegisterName()); - - std::vector switch_parameters; - switch_parameters.push_back(pc); - - for (auto destination : target_list) { - switch_parameters.push_back( - llvm::ConstantInt::get(pc_reg->type, destination.address)); - } - - // Invoke the anvill switch - auto &module = *block->getModule(); - auto anvill_switch_func = GetAnvillSwitchFunc(module, address_type); - - llvm::IRBuilder<> ir(block); - auto next_pc = ir.CreateCall(anvill_switch_func, switch_parameters); - - // Now use the anvill switch output with a SwitchInst, mapping cases - // by index - auto dest_count = target_list.size(); - auto switch_inst = ir.CreateSwitch(next_pc, default_case, dest_count); - auto dest_id{0u}; - - for (auto dest : target_list) { - auto dest_block = - GetOrCreateTargetBlock(inst, dest.address, prev_context); - auto dest_case = llvm::ConstantInt::get(address_type, dest_id++); - switch_inst->addCase(dest_case, dest_block); - } - - AnnotateInstruction(next_pc, pc_annotation_id, pc_annotation); - AnnotateInstruction(switch_inst, pc_annotation_id, pc_annotation); - } - - if (add_remill_jump) { - - // Either we didn't find any target list from the control flow provider, or - // we did but it wasn't marked as `complete`. - auto jump = - remill::AddTerminatingTailCall(current_bb, intrinsics.jump, intrinsics); - AnnotateInstruction(jump, pc_annotation_id, pc_annotation); - } -} - -// Visit an indirect jump control-flow instruction. This may be register- or -// memory-indirect, e.g. `jmp rax` or `jmp [rax]` on x86. Thus, the target is -// not know a priori and our default mechanism for handling this is to perform -// a tail-call to the `__remill_jump` function, whose role is to be a stand-in -// something that enacts the effect of "transfer to target." -void FunctionLifter::VisitIndirectJump( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - const remill::Instruction::IndirectJump &mapper, - const remill::DecodingContext &prev_context) { - auto cf = options.control_flow_provider.GetControlFlowOverride(inst.pc); - if (std::holds_alternative(cf)) { - auto jmp_spec = std::get(cf); - - VisitDelayedInstruction(inst, delayed_inst, block, true); - - // Try to get the target type given the source. This is like a tail-call, - // e.g. `jmp [fseek]`. - if (auto maybe_decl = - type_provider.TryGetCalledFunctionType(func_address, inst)) { - llvm::IRBuilder<> ir(block); - llvm::Value *dest_addr = ir.CreateLoad(pc_reg_type, pc_reg_ref); - AnnotateInstruction(dest_addr, pc_annotation_id, pc_annotation); - auto new_mem_ptr = - CallCallableDecl(block, dest_addr, std::move(maybe_decl.value())); - ir.CreateRet(new_mem_ptr); - - // Attempt to get the target list for this control flow instruction - // so that we can handle this jump in a less generic way. - } else if (jmp_spec.targets.size() > 0) { - - DoSwitchBasedIndirectJump(inst, block, jmp_spec.targets, mapper, - prev_context); - - // No good info; do an indirect jump. - } else { - auto jump = - remill::AddTerminatingTailCall(block, intrinsics.jump, intrinsics); - AnnotateInstruction(jump, pc_annotation_id, pc_annotation); - } - } else if (std::holds_alternative(cf)) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - CallFunction(inst, block, std::nullopt); - InsertError(block); - } else if (std::holds_alternative(cf)) { - // TODO(Ian): It feels like we should be able to handle overrides/control flow much more uniformally, I think it would be good to do so in - // a separate PR. - this->VisitFunctionReturn(inst, delayed_inst, block); - } else { - LOG(FATAL) << "Invalid spec for indirect jump at " << std::hex << inst.pc; - } -} - -void FunctionLifter::VisitConditionalInstruction( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - const remill::Instruction::ConditionalInstruction &conditional_insn, - const remill::DecodingContext &prev_context) { - - const auto lifted_func = block->getParent(); - const auto cond = remill::LoadBranchTaken(block); - const auto taken_block = - llvm::BasicBlock::Create(llvm_context, "", lifted_func); - const auto not_taken_block = - llvm::BasicBlock::Create(llvm_context, "", lifted_func); - - auto cond_jump_fallthrough_br = - llvm::BranchInst::Create(taken_block, not_taken_block, cond, block); - - FlowVisitor visitor = {*this, inst, taken_block, delayed_inst, prev_context}; - std::visit(visitor, conditional_insn.taken_branch); - - VisitDelayedInstruction(inst, delayed_inst, not_taken_block, false); - - - auto fallthrough_br = llvm::BranchInst::Create( - GetOrCreateTargetBlock(inst, inst.next_pc, - conditional_insn.fall_through.fallthrough_context), - not_taken_block); - - AnnotateInstruction(cond_jump_fallthrough_br, pc_annotation_id, - pc_annotation); - AnnotateInstruction(fallthrough_br, pc_annotation_id, pc_annotation); -} - -// Visit a function return control-flow instruction, which is a form of -// indirect control-flow, but with a certain semantic associated with -// returning from a function. This is treated similarly to indirect jumps, -// except the `__remill_function_return` function is tail-called. -void FunctionLifter::VisitFunctionReturn( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - auto func_return = remill::AddTerminatingTailCall( - block, intrinsics.function_return, intrinsics); - AnnotateInstruction(func_return, pc_annotation_id, pc_annotation); - MuteStateEscape(func_return); -} - std::optional FunctionLifter::TryGetTargetFunctionType(const remill::Instruction &from_inst, std::uint64_t address) { @@ -593,127 +315,6 @@ FunctionLifter::TryGetTargetFunctionType(const remill::Instruction &from_inst, return opt_callable_decl; } -// Call `pc` in `block`, treating it as a callable declaration `decl`. -llvm::Value *FunctionLifter::CallCallableDecl(llvm::BasicBlock *block, - llvm::Value *pc, - CallableDecl decl) { - llvm::IRBuilder<> ir(block); - CHECK_NOTNULL(decl.type); - CHECK_EQ(decl.arch, options.arch); - - auto &context = block->getContext(); - - auto dest_func = - ir.CreateBitOrPointerCast(pc, llvm::PointerType::get(context, 0)); - - auto mem_ptr = ir.CreateLoad(mem_ptr_type, mem_ptr_ref); - auto new_mem_ptr = - decl.CallFromLiftedBlock(dest_func, type_specifier.Dictionary(), - intrinsics, block, state_ptr, mem_ptr); - auto store = ir.CreateStore(new_mem_ptr, mem_ptr_ref); - - AnnotateInstruction(dest_func, pc_annotation_id, pc_annotation); - AnnotateInstruction(mem_ptr, pc_annotation_id, pc_annotation); - AnnotateInstruction(new_mem_ptr, pc_annotation_id, pc_annotation); - AnnotateInstruction(store, pc_annotation_id, pc_annotation); - - return new_mem_ptr; -} - -// Try to resolve `inst.branch_taken_pc` to a lifted function, and introduce -// a function call to that address in `block`. Failing this, add a call -// to `__remill_function_call`. -bool FunctionLifter::CallFunction(const remill::Instruction &inst, - llvm::BasicBlock *block, - std::optional target_pc) { - auto cf = options.control_flow_provider.GetControlFlowOverride(inst.pc); - if (!std::holds_alternative(cf)) { - LOG(FATAL) << "Invalid spec for call at " << std::hex << inst.pc; - } - auto call_spec = std::get(cf); - std::optional maybe_decl; - - if (call_spec.target_address.has_value()) { - // First, try to see if it's actually related to another function. This is - // equivalent to a tail-call in the original code. - auto redirected_addr = *call_spec.target_address; - - // Now, get the type of the target given the source and destination. - maybe_decl = TryGetTargetFunctionType(inst, redirected_addr); - target_pc = redirected_addr; - } else if (target_pc.has_value()) { - - maybe_decl = - type_provider.TryGetCalledFunctionType(func_address, inst, *target_pc); - } else { - - // If we don't know a concrete target address, then just try to get the - // target given the source. - maybe_decl = type_provider.TryGetCalledFunctionType(func_address, inst); - } - - if (!maybe_decl) { - LOG(ERROR) << "Missing type information for function called at address " - << std::hex << inst.pc << " in function at address " - << func_address << std::dec; - - // If we do not have a function declaration, treat this as a call - // to an unknown address. - auto call = remill::AddCall(block, intrinsics.function_call, intrinsics); - AnnotateInstruction(call, pc_annotation_id, pc_annotation); - return true; - } - - - llvm::IRBuilder<> ir(block); - llvm::Value *dest_addr = nullptr; - - if (target_pc) { - dest_addr = - options.program_counter_init_procedure(ir, pc_reg, target_pc.value()); - } else { - dest_addr = ir.CreateLoad(pc_reg_type, pc_reg_ref); - } - - AnnotateInstruction(dest_addr, pc_annotation_id, pc_annotation); - auto is_noreturn = maybe_decl->is_noreturn; - (void) CallCallableDecl(block, dest_addr, std::move(maybe_decl.value())); - return !is_noreturn; -} - -// Visit a direct function call control-flow instruction. The target is known -// at decode time, and its realized address is stored in -// `inst.branch_taken_pc`. In practice, what we do in this situation is try -// to call the lifted function function at the target address. -void FunctionLifter::VisitDirectFunctionCall( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - const remill::Instruction::DirectFunctionCall &dcall, - const remill::DecodingContext &prev_context) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - bool can_return = CallFunction(inst, block, inst.branch_taken_pc); - VisitAfterFunctionCall(inst, block, dcall, can_return, prev_context); -} - - -// Visit an indirect function call control-flow instruction. Similar to -// indirect jumps, we invoke an intrinsic function, `__remill_function_call`; -// however, unlike indirect jumps, we do not tail-call this intrinsic, and -// we continue lifting at the instruction where execution will resume after -// the callee returns. Thus, lifted bitcode maintains the call graph structure -// as it presents itself in the binary. -void FunctionLifter::VisitIndirectFunctionCall( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - const remill::Instruction::IndirectFunctionCall &icall, - const remill::DecodingContext &prev_context) { - - VisitDelayedInstruction(inst, delayed_inst, block, true); - bool can_return = CallFunction(inst, block, std::nullopt); - VisitAfterFunctionCall(inst, block, icall, can_return, prev_context); -} - - // Helper to figure out the address where execution will resume after a // function call. In practice this is the instruction following the function // call, encoded in `inst.branch_not_taken_pc`. However, SPARC has a terrible @@ -724,7 +325,8 @@ void FunctionLifter::VisitIndirectFunctionCall( // should resume after a `call`. std::pair FunctionLifter::LoadFunctionReturnAddress(const remill::Instruction &inst, - llvm::BasicBlock *block) { + llvm::BasicBlock *block, + llvm::Value *state_ptr) { const auto pc = inst.branch_not_taken_pc; @@ -804,173 +406,6 @@ FunctionLifter::LoadFunctionReturnAddress(const remill::Instruction &inst, } } -// Enact relevant control-flow changes after a function call. This figures -// out the return address targeted by the callee and links it into the -// control-flow graph. -void FunctionLifter::VisitAfterFunctionCall( - const remill::Instruction &inst, llvm::BasicBlock *block, - const std::variant &call, - bool can_return, const remill::DecodingContext &prev_context) { - const auto [ret_pc, ret_pc_val] = LoadFunctionReturnAddress(inst, block); - - llvm::IRBuilder<> ir(block); - if (can_return) { - auto update_pc = ir.CreateStore(ret_pc_val, pc_reg_ref, false); - auto update_next_pc = ir.CreateStore(ret_pc_val, next_pc_reg_ref, false); - auto branch_to_next_pc = - ir.CreateBr(GetOrCreateTargetBlock(inst, ret_pc, prev_context)); - - AnnotateInstruction(update_pc, pc_annotation_id, pc_annotation); - AnnotateInstruction(update_next_pc, pc_annotation_id, pc_annotation); - AnnotateInstruction(branch_to_next_pc, pc_annotation_id, pc_annotation); - } else { - auto tail = remill::AddTerminatingTailCall( - ir.GetInsertBlock(), intrinsics.error, this->intrinsics); - AnnotateInstruction(tail, pc_annotation_id, pc_annotation); - AnnotateInstruction(tail, pc_annotation_id, pc_annotation); - } -} - - -// Visit an asynchronous hyper call control-flow instruction. These are non- -// local control-flow transfers, such as system calls. We treat them like -// indirect function calls. -void FunctionLifter::VisitAsyncHyperCall( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block) { - VisitDelayedInstruction(inst, delayed_inst, block, true); - remill::AddTerminatingTailCall(block, intrinsics.async_hyper_call, - intrinsics); -} - -// Visit (and thus lift) a delayed instruction. When lifting a delayed -// instruction, we need to know if we're one the taken path of a control-flow -// edge, or on the not-taken path. Delayed instructions appear physically -// after some instructions, but execute logically before them in the -// CPU pipeline. They are basically a way for hardware designers to push -// the effort of keeping the pipeline full to compiler developers. -void FunctionLifter::VisitDelayedInstruction( - const remill::Instruction &inst, - std::optional &delayed_inst, llvm::BasicBlock *block, - bool on_taken_path) { - if (delayed_inst && options.arch->NextInstructionIsDelayed( - inst, *delayed_inst, on_taken_path)) { - const auto prev_pc_annotation = pc_annotation; - pc_annotation = GetPCAnnotation(delayed_inst->pc); - inst.GetLifter()->LiftIntoBlock(*delayed_inst, block, state_ptr, true); - AnnotateInstructions(block, pc_annotation_id, pc_annotation); - pc_annotation = prev_pc_annotation; - } -} - -// Instrument an instruction. This inject a `printf`-like function call just -// before a lifted instruction to aid in tracking the provenance of register -// values, and relating them back to original instructions. -// -// TODO(pag): In future, this mechanism should be used to provide a feedback -// loop, or to provide information to the `TypeProvider` for future -// re-lifting of code. -// -// TODO(pag): Right now, this feature is enabled by a command-line flag, and -// that flag is tested in `VisitInstruction`; we should move -// lifting configuration decisions out of here so that we can pass -// in a kind of `LiftingOptions` type that changes the lifter's -// behavior. -void FunctionLifter::InstrumentDataflowProvenance(llvm::BasicBlock *block) { - if (!data_provenance_function) { - data_provenance_function = - semantics_module->getFunction(kAnvillDataProvenanceFunc); - - if (!data_provenance_function) { - llvm::Type *args[] = {mem_ptr_type, pc_reg_type}; - auto fty = llvm::FunctionType::get(mem_ptr_type, args, true); - data_provenance_function = - llvm::Function::Create(fty, llvm::GlobalValue::ExternalLinkage, - kAnvillDataProvenanceFunc, *semantics_module); - } - } - - std::vector args; - llvm::IRBuilder<> ir(block); - args.push_back(ir.CreateLoad(mem_ptr_type, mem_ptr_ref)); - args.push_back(llvm::ConstantInt::get(pc_reg_type, curr_inst->pc)); - options.arch->ForEachRegister([&](const remill::Register *reg) { - if (reg != pc_reg && reg != sp_reg && reg->EnclosingRegister() == reg) { - args.push_back( - this->op_lifter->LoadRegValue(block, state_ptr, reg->name)); - } - }); - - ir.CreateStore(ir.CreateCall(data_provenance_function, args), mem_ptr_ref); -} - -// Adds a 'breakpoint' instrumentation, which calls functions that are named -// with an instruction's address just before that instruction executes. These -// are nifty to spot checking bitcode. This function is used like: -// -// mem = breakpoint_(mem, PC, NEXT_PC) -// -// That way, we can look at uses and compare the second argument to the -// hex address encoded in the function name, and also look at the third argument -// and see if it corresponds to the subsequent instruction address. -void FunctionLifter::InstrumentCallBreakpointFunction(llvm::BasicBlock *block) { - std::stringstream ss; - ss << "breakpoint_" << std::hex << curr_inst->pc; - - const auto func_name = ss.str(); - auto module = block->getModule(); - auto func = module->getFunction(func_name); - if (!func) { - llvm::Type *const params[] = {mem_ptr_type, address_type, address_type}; - const auto fty = llvm::FunctionType::get(mem_ptr_type, params, false); - func = llvm::Function::Create(fty, llvm::GlobalValue::ExternalLinkage, - func_name, module); - - // Make sure to keep this function around (along with `ExternalLinkage`). - func->addFnAttr(llvm::Attribute::OptimizeNone); - func->removeFnAttr(llvm::Attribute::AlwaysInline); - func->removeFnAttr(llvm::Attribute::InlineHint); - func->addFnAttr(llvm::Attribute::NoInline); - func->addFnAttr(llvm::Attribute::ReadNone); - - llvm::IRBuilder<> ir(llvm::BasicBlock::Create(llvm_context, "", func)); - ir.CreateRet(remill::NthArgument(func, 0)); - } - - llvm::Value *args[] = { - new llvm::LoadInst(mem_ptr_type, mem_ptr_ref, llvm::Twine::createNull(), - block), - this->op_lifter->LoadRegValue(block, state_ptr, remill::kPCVariableName), - this->op_lifter->LoadRegValue(block, state_ptr, - remill::kNextPCVariableName)}; - llvm::IRBuilder<> ir(block); - ir.CreateCall(func, args); -} - - -// In the process of lifting code, we may want to call another native -// function, `native_func`, for which we have high-level type info. The main -// lifter operates on a special three-argument form function style, and -// operating on this style is actually to our benefit, as it means that as -// long as we can put data into the emulated `State` structure and pull it -// out, then calling one native function from another doesn't require /us/ -// to know how to adapt one native return type into another native return -// type, and instead we let LLVM's optimizations figure it out later during -// scalar replacement of aggregates (SROA). -llvm::Value *FunctionLifter::TryCallNativeFunction(FunctionDecl decl, - llvm::Function *native_func, - llvm::BasicBlock *block) { - llvm::IRBuilder<> irb(block); - - llvm::Value *mem_ptr = irb.CreateLoad(mem_ptr_type, mem_ptr_ref); - mem_ptr = decl.CallFromLiftedBlock(native_func, type_specifier.Dictionary(), - intrinsics, block, state_ptr, mem_ptr); - irb.SetInsertPoint(block); - irb.CreateStore(mem_ptr, mem_ptr_ref); - return mem_ptr; -} - // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. @@ -1057,19 +492,19 @@ FunctionLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, break; case StateStructureInitializationProcedure::kGlobalRegisterVariables: new_state_ptr = ir.CreateAlloca(state_type); - InitializeStateStructureFromGlobalRegisterVariables(block); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); break; case StateStructureInitializationProcedure:: kGlobalRegisterVariablesAndZeroes: new_state_ptr = ir.CreateAlloca(state_type); ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); - InitializeStateStructureFromGlobalRegisterVariables(block); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); break; case StateStructureInitializationProcedure:: kGlobalRegisterVariablesAndUndef: new_state_ptr = ir.CreateAlloca(state_type); ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); - InitializeStateStructureFromGlobalRegisterVariables(block); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); break; } @@ -1138,7 +573,7 @@ void FunctionLifter::ArchSpecificStateStructureInitialization( // variables. The purpose of these global variables is to show that there are // some unmodelled external dependencies inside of a lifted function. void FunctionLifter::InitializeStateStructureFromGlobalRegisterVariables( - llvm::BasicBlock *block) { + llvm::BasicBlock *block, llvm::Value *state_ptr) { // Get or create globals for all top-level registers. The idea here is that // the spec could feasibly miss some dependencies, and so after optimization, @@ -1187,10 +622,10 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( llvm::Value *mem_ptr = llvm::Constant::getNullValue(mem_ptr_type); // Stack-allocate and initialize the state pointer. - this->state_ptr = AllocateAndInitializeStateStructure(block, decl.arch); + auto native_state_ptr = AllocateAndInitializeStateStructure(block, decl.arch); - auto pc_ptr = pc_reg->AddressOf(state_ptr, block); - auto sp_ptr = sp_reg->AddressOf(state_ptr, block); + auto pc_ptr = pc_reg->AddressOf(native_state_ptr, block); + auto sp_ptr = sp_reg->AddressOf(native_state_ptr, block); llvm::IRBuilder<> ir(block); @@ -1214,7 +649,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( options.return_address_init_procedure(ir, address_type, func_address); mem_ptr = StoreNativeValue(ra, decl.return_address, types, intrinsics, - block, state_ptr, mem_ptr); + block, native_state_ptr, mem_ptr); } // Store the function parameters either into the state struct @@ -1223,11 +658,11 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( for (auto &arg : native_func->args()) { const auto ¶m_decl = decl.params[arg_index++]; mem_ptr = StoreNativeValue(&arg, param_decl, types, intrinsics, block, - state_ptr, mem_ptr); + native_state_ptr, mem_ptr); } llvm::Value *lifted_func_args[remill::kNumBlockArgs] = {}; - lifted_func_args[remill::kStatePointerArgNum] = state_ptr; + lifted_func_args[remill::kStatePointerArgNum] = native_state_ptr; lifted_func_args[remill::kMemoryPointerArgNum] = mem_ptr; lifted_func_args[remill::kPCArgNum] = pc; auto call_to_lifted_func = ir.CreateCall(lifted_func->getFunctionType(), @@ -1250,7 +685,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( if (decl.returns.size() == 1) { ret_val = LoadLiftedValue(decl.returns.front(), types, intrinsics, block, - state_ptr, mem_ptr); + native_state_ptr, mem_ptr); ir.SetInsertPoint(block); } else if (1 < decl.returns.size()) { @@ -1258,7 +693,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( auto index = 0u; for (auto &ret_decl : decl.returns) { auto partial_ret_val = LoadLiftedValue(ret_decl, types, intrinsics, block, - state_ptr, mem_ptr); + native_state_ptr, mem_ptr); ir.SetInsertPoint(block); unsigned indexes[] = {index}; ret_val = ir.CreateInsertValue(ret_val, partial_ret_val, indexes); @@ -1417,7 +852,7 @@ FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( bool FunctionLifter::DoInterProceduralControlFlow( const remill::Instruction &insn, llvm::BasicBlock *block, - const anvill::ControlFlowOverride &override) { + const anvill::ControlFlowOverride &override, llvm::Value *state_ptr) { // only handle inter-proc since intra-proc are handled implicitly by the CFG. llvm::IRBuilder<> builder(block); if (std::holds_alternative(override)) { @@ -1425,7 +860,7 @@ bool FunctionLifter::DoInterProceduralControlFlow( this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics); if (!cc.stop) { - auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); + auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block, state_ptr); auto npc = remill::LoadNextProgramCounterRef(block); auto pc = remill::LoadProgramCounterRef(block); builder.CreateStore(raddr, npc); @@ -1443,7 +878,8 @@ bool FunctionLifter::DoInterProceduralControlFlow( return true; } bool FunctionLifter::ApplyInterProceduralControlFlowOverride( - const remill::Instruction &insn, llvm::BasicBlock *&block) { + const remill::Instruction &insn, llvm::BasicBlock *&block, + llvm::Value *state_ptr) { // if this instruction is conditional and interprocedural then we are going to split the block into a case were we do take it and a branch where we dont and then rejoin @@ -1463,14 +899,16 @@ bool FunctionLifter::ApplyInterProceduralControlFlowOverride( builder.CreateCondBr(btaken, do_control_flow, continuation); // if the interprocedural control flow block isnt terminal link it back up - if (this->DoInterProceduralControlFlow(insn, do_control_flow, override)) { + if (this->DoInterProceduralControlFlow(insn, do_control_flow, override, + state_ptr)) { llvm::BranchInst::Create(continuation, do_control_flow); } block = continuation; return true; } else { - return this->DoInterProceduralControlFlow(insn, block, override); + return this->DoInterProceduralControlFlow(insn, block, override, + state_ptr); } } @@ -1516,11 +954,17 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( // Even when something isn't supported or is invalid, we still lift // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want // to treat instruction lifting as an operation that can't fail. + + CHECK(llvm::isa_and_nonnull( + basic_block_function.state_ptr) && + llvm::cast(basic_block_function.state_ptr) + ->getParent() + ->getParent() == basic_block_function.func); std::ignore = inst.GetLifter()->LiftIntoBlock( inst, bb, basic_block_function.state_ptr, false /* is_delayed */); - ended_on_terminal = - !this->ApplyInterProceduralControlFlowOverride(inst, bb); + ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride( + inst, bb, basic_block_function.state_ptr); } if (!ended_on_terminal) { @@ -1539,13 +983,14 @@ llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( llvm::CallInst *FunctionLifter::CallBasicBlockFunction( uint64_t block_addr, llvm::BasicBlock *add_to_llvm, llvm::Function *bb_func, - llvm::ArrayRef extra_args, llvm::Instruction *IP) const { + llvm::Value *parent_state, llvm::ArrayRef extra_args, + llvm::Instruction *IP) const { llvm::IRBuilder<> builder(add_to_llvm); if (IP) { builder.SetInsertPoint(IP); } std::vector args(remill::kNumBlockArgs + 1); - args[remill::kStatePointerArgNum] = this->state_ptr; + args[remill::kStatePointerArgNum] = parent_state; args[remill::kPCArgNum] = options.program_counter_init_procedure(builder, pc_reg, block_addr); @@ -1562,8 +1007,8 @@ llvm::CallInst *FunctionLifter::CallBasicBlockFunction( } -void FunctionLifter::VisitBlock(CodeBlock blk) { - +void FunctionLifter::VisitBlock(CodeBlock blk, + llvm::Value *lifted_function_state) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); llvm::IRBuilder<> builder(llvm_blk); auto bb_lifted_func = this->CreateBasicBlockFunction(blk); @@ -1573,8 +1018,8 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); CHECK(!llvm::verifyFunction(*bb_lifted_func.func, &llvm::errs())); - auto new_mem_ptr = - this->CallBasicBlockFunction(blk.addr, llvm_blk, bb_lifted_func.func); + auto new_mem_ptr = this->CallBasicBlockFunction( + blk.addr, llvm_blk, bb_lifted_func.func, lifted_function_state); auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); @@ -1591,12 +1036,12 @@ void FunctionLifter::VisitBlock(CodeBlock blk) { } } -void FunctionLifter::VisitBlocks() { +void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { DLOG(INFO) << "Num blocks for func " << std::hex << this->curr_decl->address << ": " << this->curr_decl->cfg.size(); for (const auto &[addr, blk] : this->curr_decl->cfg) { DLOG(INFO) << "Visiting: " << std::hex << addr; - this->VisitBlock(blk); + this->VisitBlock(blk, lifted_function_state); } CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); @@ -1709,7 +1154,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { this->op_lifter->ClearCache(); curr_decl = &decl; curr_inst = nullptr; - state_ptr = nullptr; mem_ptr_ref = nullptr; func_address = decl.address; native_func = DeclareFunction(decl); @@ -1755,8 +1199,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { this->CreateLiftedFunction(native_func->getName().str() + ".lifted"); lifted_func = lifted_func_st.func; - state_ptr = lifted_func_st.state_ptr; - invalid_successor_block = llvm::BasicBlock::Create(lifted_func_st.func->getContext(), @@ -1768,14 +1210,16 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { const auto pc = lifted_func_st.pc_arg; const auto entry_block = &(lifted_func->getEntryBlock()); pc_reg_ref = - this->op_lifter->LoadRegAddress(entry_block, state_ptr, pc_reg->name) - .first; - next_pc_reg_ref = this->op_lifter - ->LoadRegAddress(entry_block, state_ptr, remill::kNextPCVariableName) + ->LoadRegAddress(entry_block, lifted_func_st.state_ptr, pc_reg->name) .first; + next_pc_reg_ref = this->op_lifter + ->LoadRegAddress(entry_block, lifted_func_st.state_ptr, + remill::kNextPCVariableName) + .first; sp_reg_ref = - this->op_lifter->LoadRegAddress(entry_block, state_ptr, sp_reg->name) + this->op_lifter + ->LoadRegAddress(entry_block, lifted_func_st.state_ptr, sp_reg->name) .first; mem_ptr_ref = remill::LoadMemoryPointerRef(entry_block); @@ -1804,11 +1248,11 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { DLOG(INFO) << "Visiting insns"; // Go lift all instructions! - VisitBlocks(); + VisitBlocks(lifted_func_st.state_ptr); CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); - this->ApplyBasicBlockTransform(param_pass); + this->ApplyBasicBlockTransform(param_pass, lifted_func_st.state_ptr); // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. @@ -1822,7 +1266,8 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { return native_func; } -void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { +void FunctionLifter::ApplyBasicBlockTransform( + BasicBlockTransform &transform, llvm::Value *lifted_function_state) { llvm::SmallVector, 10> calls; for (auto &insn : llvm::instructions(this->lifted_func)) { if (llvm::CallInst *call = llvm::dyn_cast(&insn)) { @@ -1853,6 +1298,7 @@ void FunctionLifter::ApplyBasicBlockTransform(BasicBlockTransform &transform) { call->getArgOperand(remill::kMemoryPointerArgNum))); } auto new_call = this->CallBasicBlockFunction(addr, old_block, res.new_func, + lifted_function_state, lifted_values, call); llvm::BranchInst::Create(new_block, old_block); @@ -2118,59 +1564,4 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, return new_version; } - -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::NormalInsn &normal) { - this->lifter.VisitNormal(this->inst, this->block, normal); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::InvalidInsn &) { - this->lifter.VisitInvalid(inst, block); -} - - -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::ErrorInsn &) { - this->lifter.VisitError(inst, delayed_inst, block); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::DirectJump &djump) { - this->lifter.VisitDirectJump(inst, delayed_inst, block, djump); -} - -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::IndirectJump &ijump) { - this->lifter.VisitIndirectJump(inst, delayed_inst, block, ijump, - prev_context); -} - -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::IndirectFunctionCall &icall) { - this->lifter.VisitIndirectFunctionCall(inst, delayed_inst, block, icall, - prev_context); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::DirectFunctionCall &dcall) { - this->lifter.VisitDirectFunctionCall(inst, delayed_inst, block, dcall, - prev_context); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::FunctionReturn &) { - this->lifter.VisitFunctionReturn(inst, delayed_inst, block); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::AsyncHyperCall &async_hcall) { - this->lifter.VisitAsyncHyperCall(inst, delayed_inst, block); -} -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::ConditionalInstruction &cond_insn) { - this->lifter.VisitConditionalInstruction(inst, delayed_inst, block, cond_insn, - this->prev_context); -} - -void FunctionLifter::FlowVisitor::operator()( - const remill::Instruction::NoOp &noop) { - this->lifter.VisitNoOp(inst, block, noop); -} - } // namespace anvill diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 0b26c1bac..ba50be00b 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -97,7 +97,7 @@ class FunctionLifter { llvm::CallInst * CallBasicBlockFunction(uint64_t block_addr, llvm::BasicBlock *add_to_llvm, - llvm::Function *bb_func, + llvm::Function *bb_func, llvm::Value *state_ptr, llvm::ArrayRef extra_args = {}, llvm::Instruction *IP = {}) const; @@ -158,8 +158,6 @@ class FunctionLifter { // Three-argument Remill function into which instructions are lifted. llvm::Function *lifted_func{nullptr}; - // State pointer in `lifted_func`. - llvm::Value *state_ptr{nullptr}; // Pointer to the `Memory *` in `lifted_func`. llvm::Value *mem_ptr_ref{nullptr}; @@ -234,7 +232,8 @@ class FunctionLifter { // instruction. llvm::BasicBlock *GetOrCreateBlock(uint64_t addr); - void ApplyBasicBlockTransform(BasicBlockTransform &transform); + void ApplyBasicBlockTransform(BasicBlockTransform &transform, + llvm::Value *lifted_function_state); // Attempts to lookup any redirection of the given address, and then // calls GetOrCreateBlock @@ -244,92 +243,6 @@ class FunctionLifter { void InsertError(llvm::BasicBlock *block); - /* -NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, - IndirectJump, IndirectFunctionCall, DirectFunctionCall, - FunctionReturn, AsyncHyperCall, ConditionalInstruction>*/ - - struct FlowVisitor { - FunctionLifter &lifter; - const remill::Instruction &inst; - llvm::BasicBlock *block; - std::optional &delayed_inst; - const remill::DecodingContext &prev_context; - - - void operator()(const remill::Instruction::NormalInsn &); - void operator()(const remill::Instruction::NoOp &); - void operator()(const remill::Instruction::InvalidInsn &); - void operator()(const remill::Instruction::ErrorInsn &); - void operator()(const remill::Instruction::DirectJump &); - void operator()(const remill::Instruction::IndirectJump &); - void operator()(const remill::Instruction::IndirectFunctionCall &); - void operator()(const remill::Instruction::DirectFunctionCall &); - void operator()(const remill::Instruction::FunctionReturn &); - void operator()(const remill::Instruction::AsyncHyperCall &); - void operator()(const remill::Instruction::ConditionalInstruction &); - }; - - // The following `Visit*` methods exist to orchestrate control flow. The way - // lifting works in Remill is that the mechanics of an instruction are - // simulated by a single-entry, single-exit function, called a semantics - // function. A `remill::Instruction` is basically a fancy package of - // information describing what to pass to that function. However, many - // instructions affect control-flow, and so that means that in order to - // enact the control-flow changes that are implied by an instruction, we must - // "orchestrate" lifting of control flow at a higher level, introduction - // conditional branches and such between these called to semantics functions. - - // Visit an invalid instruction. An invalid instruction is a sequence of - // bytes which cannot be decoded, or an empty byte sequence. - void VisitInvalid(const remill::Instruction &inst, llvm::BasicBlock *block); - - // Visit an error instruction. An error instruction is guaranteed to trap - // execution somehow, e.g. `ud2` on x86. Error instructions are treated - // similarly to invalid instructions, with the exception that they can have - // delay slots, and therefore the subsequent instruction may actually execute - // prior to the error. - void VisitError(const remill::Instruction &inst, - std::optional &delayed_inst, - llvm::BasicBlock *block); - - // Visit a normal instruction. Normal instructions have straight line control- - // flow semantics, i.e. after executing the instruction, execution proceeds - // to the next instruction (`inst.next_pc`). - void VisitNormal(const remill::Instruction &inst, llvm::BasicBlock *block, - const remill::Instruction::NormalInsn &norm); - - // Visit a no-op instruction. These behave identically to normal instructions - // from a control-flow perspective. - void VisitNoOp(const remill::Instruction &inst, llvm::BasicBlock *block, - const remill::Instruction::NoOp &noop); - - // Visit a direct jump control-flow instruction. The target of the jump is - // known at decode time, and the target address is available in - // `inst.branch_taken_pc`. Execution thus needs to transfer to the instruction - // (and thus `llvm::BasicBlock`) associated with `inst.branch_taken_pc`. - void VisitDirectJump(const remill::Instruction &inst, - std::optional &delayed_inst, - llvm::BasicBlock *block, - const remill::Instruction::DirectJump &norm); - - // Visit an indirect jump control-flow instruction. This may be register- or - // memory-indirect, e.g. `jmp rax` or `jmp [rax]` on x86. Thus, the target is - // not know a priori and our default mechanism for handling this is to perform - // a tail-call to the `__remill_jump` function, whose role is to be a stand-in - // something that enacts the effect of "transfer to target." - void VisitIndirectJump(const remill::Instruction &inst, - std::optional &delayed_inst, - llvm::BasicBlock *block, - const remill::Instruction::IndirectJump &ijump, - const remill::DecodingContext &prev_context); - - // Visit an indirect jump that is a jump table. - void DoSwitchBasedIndirectJump(const remill::Instruction &inst, - llvm::BasicBlock *block, - const std::vector &target_list, - const remill::Instruction::IndirectJump &norm, - const remill::DecodingContext &prev_context); remill::DecodingContext ApplyTargetList(const std::unordered_map &assignments, @@ -404,54 +317,9 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // should resume after a `call`. std::pair LoadFunctionReturnAddress(const remill::Instruction &inst, - llvm::BasicBlock *block); - - // Enact relevant control-flow changed after a function call. This figures - // out the return address targeted by the callee and links it into the - // control-flow graph. - void VisitAfterFunctionCall( - const remill::Instruction &inst, llvm::BasicBlock *block, - const std::variant &, - bool can_return, const remill::DecodingContext &prev_context); - - // Visit an asynchronous hyper call control-flow instruction. These are non- - // local control-flow transfers, such as system calls. We treat them like - // indirect function calls. - void VisitAsyncHyperCall(const remill::Instruction &inst, - std::optional &delayed_inst, - llvm::BasicBlock *block); - - // Visit (and thus lift) a delayed instruction. When lifting a delayed - // instruction, we need to know if we're one the taken path of a control-flow - // edge, or on the not-taken path. Delayed instructions appear physically - // after some instructions, but execute logically before them in the - // CPU pipeline. They are basically a way for hardware designers to push - // the effort of keeping the pipeline full to compiler developers. - void VisitDelayedInstruction(const remill::Instruction &inst, - std::optional &delayed_inst, - llvm::BasicBlock *block, bool on_taken_path); + llvm::BasicBlock *block, llvm::Value *state_ptr); - // Instrument an instruction. This inject a `printf` call just before a - // lifted instruction to aid in debugging. - // - // TODO(pag): In future, this mechanism should be used to provide a feedback - // loop, or to provide information to the `TypeProvider` for future - // re-lifting of code. - // - // TODO(pag): Right now, this feature is enabled by a command-line flag, and - // that flag is tested in `VisitInstruction`; we should move - // lifting configuration decisions out of here so that we can pass - // in a kind of `LiftingOptions` type that changes the lifter's - // behavior. - void InstrumentDataflowProvenance(llvm::BasicBlock *block); - - // Adds a 'breakpoint' instrumentation, which calls functions that are named - // with an instruction's address just before that instruction executes. These - // are nifty to spot checking bitcode. - void InstrumentCallBreakpointFunction(llvm::BasicBlock *block); - - void VisitBlock(CodeBlock entry_context); + void VisitBlock(CodeBlock entry_context, llvm::Value *lifted_function_state); LiftedFunction CreateLiftedFunction(const std::string &name); @@ -465,23 +333,7 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); - void VisitBlocks(); - - // In the process of lifting code, we may want to call another native - // function, `native_func`, for which we have high-level type info. The main - // lifter operates on a special three-argument form function style, and - // operating on this style is actually to our benefit, as it means that as - // long as we can put data into the emulated `State` structure and pull it - // out, then calling one native function from another doesn't require /us/ - // to know how to adapt one native return type into another native return - // type, and instead we let LLVM's optimizations figure it out later during - // scalar replacement of aggregates (SROA). - llvm::Value *TryCallNativeFunction(FunctionDecl decl, - llvm::Function *native_func, - llvm::BasicBlock *block); - - // Visit all instructions. This runs the work list and lifts instructions. - void VisitInstructions(uint64_t address); + void VisitBlocks(llvm::Value *lifted_function_state); // Try to decode an instruction at address `addr` into `*inst_out`. Returns // a context map if sueccessful and std::nullopt otherwise. `is_delayed` tells the decoder @@ -509,12 +361,13 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // inter-procedural effect. // Returns a boolean represnting wether decoding should continue (true = non-terminal, false=terminal) bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &, - llvm::BasicBlock *&block); + llvm::BasicBlock *&block, + llvm::Value *state_ptr); - bool - DoInterProceduralControlFlow(const remill::Instruction &insn, - llvm::BasicBlock *block, - const anvill::ControlFlowOverride &override); + bool DoInterProceduralControlFlow(const remill::Instruction &insn, + llvm::BasicBlock *block, + const anvill::ControlFlowOverride &override, + llvm::Value *state_ptr); // Same addcall machinery from remill except allows for the 4 argument basic block functio (state, program_counter, memory, next_pc_ref). llvm::CallInst *AddCallFromBasicBlockFunctionToLifted( @@ -539,7 +392,8 @@ NormalInsn, NoOp, InvalidInsn, ErrorInsn, DirectJump, // variables. The purpose of these global variables is to show that there are // some unmodelled external dependencies inside of a lifted function. void - InitializeStateStructureFromGlobalRegisterVariables(llvm::BasicBlock *block); + InitializeStateStructureFromGlobalRegisterVariables(llvm::BasicBlock *block, + llvm::Value *state_ptr); }; } // namespace anvill From f8c61426fe7fdf3753676f68b4485abdcf82f0df Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 7 Dec 2022 13:40:07 -0500 Subject: [PATCH 037/163] shared functionality into codelifter --- lib/CMakeLists.txt | 4 + lib/Lifters/BasicBlockLifter.cpp | 382 +++++++++++++++++++++++++ lib/Lifters/BasicBlockLifter.h | 75 +++++ lib/Lifters/CodeLifter.cpp | 121 ++++++++ lib/Lifters/CodeLifter.h | 43 +++ lib/Lifters/FunctionLifter.cpp | 465 ------------------------------- lib/Lifters/FunctionLifter.h | 6 +- 7 files changed, 627 insertions(+), 469 deletions(-) create mode 100644 lib/Lifters/BasicBlockLifter.cpp create mode 100644 lib/Lifters/CodeLifter.cpp create mode 100644 lib/Lifters/CodeLifter.h diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 3acd4bb71..1f45682e1 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -83,6 +83,8 @@ set(anvill_lifters_HEADERS "Lifters/DataLifter.h" "Lifters/EntityLifter.h" "Lifters/FunctionLifter.h" + "Lifters/BasicBlockLifter.h" + "Lifters/CodeLifter.h" "Lifters/ValueLifter.h" "Lifters/BasicBlockTransform.h" ) @@ -91,6 +93,8 @@ set(anvill_lifters_SOURCES "Lifters/DataLifter.cpp" "Lifters/EntityLifter.cpp" "Lifters/FunctionLifter.cpp" + "Lifters/CodeLifter.cpp" + "Lifters/BasicBlockLifter.cpp" "Lifters/Options.cpp" "Lifters/ValueLifter.cpp" "Lifters/BasicBlockTransform.cpp" diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp new file mode 100644 index 000000000..e186ce71e --- /dev/null +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -0,0 +1,382 @@ +#include "BasicBlockLifter.h" + +#include +#include +#include +#include + +namespace anvill { + +llvm::Function *BasicBlockLifter::LiftBasicBlockFunction() { + auto bbfunc = this->CreateBasicBlockFunction(); + this->LiftBasicBlockIntoFunction(bbfunc); + return bbfunc.func; +} + + +remill::DecodingContext BasicBlockLifter::ApplyContextAssignments( + const std::unordered_map &assignments, + remill::DecodingContext prev_context) { + for (const auto &[k, v] : assignments) { + prev_context.UpdateContextReg(k, v); + } + return prev_context; +} + + +llvm::CallInst *BasicBlockLifter::AddCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics) { + auto func = source_block->getParent(); + llvm::IRBuilder<> ir(source_block); + std::array args; + args[remill::kMemoryPointerArgNum] = + NthArgument(func, remill::kMemoryPointerArgNum); + args[remill::kStatePointerArgNum] = + NthArgument(func, remill::kStatePointerArgNum); + args[remill::kPCArgNum] = NthArgument(func, remill::kPCArgNum); + return ir.CreateCall(dest_func, args); +} + + +// Helper to figure out the address where execution will resume after a +// function call. In practice this is the instruction following the function +// call, encoded in `inst.branch_not_taken_pc`. However, SPARC has a terrible +// ABI where they inject an invalid instruction following some calls as a way +// of communicating to the callee that they should return an object of a +// particular, hard-coded size. Thus, we want to actually identify then ignore +// that instruction, and present the following address for where execution +// should resume after a `call`. +std::pair +BasicBlockLifter::LoadFunctionReturnAddress(const remill::Instruction &inst, + llvm::BasicBlock *block) { + + const auto pc = inst.branch_not_taken_pc; + + // The semantics for handling a call save the expected return program counter + // into a local variable. + auto ret_pc = this->op_lifter->LoadRegValue(block, state_ptr, + remill::kReturnPCVariableName); + if (!is_sparc) { + return {pc, ret_pc}; + } + + uint8_t bytes[4] = {}; + + for (auto i = 0u; i < 4u; ++i) { + auto [byte, accessible, perms] = memory_provider.Query(pc + i); + switch (accessible) { + case ByteAvailability::kUnknown: + case ByteAvailability::kUnavailable: + LOG(ERROR) + << "Byte at address " << std::hex << (pc + i) + << " is not available for inspection to figure out return address " + << " of call instruction at address " << pc << std::dec; + return {pc, ret_pc}; + + default: bytes[i] = byte; break; + } + + switch (perms) { + case BytePermission::kUnknown: + case BytePermission::kReadableExecutable: + case BytePermission::kReadableWritableExecutable: break; + case BytePermission::kReadable: + case BytePermission::kReadableWritable: + LOG(ERROR) + << "Byte at address " << std::hex << (pc + i) << " being inspected " + << "to figure out return address of call instruction at address " + << pc << " is not executable" << std::dec; + return {pc, ret_pc}; + } + } + + union Format0a { + uint32_t flat; + struct { + uint32_t imm22 : 22; + uint32_t op2 : 3; + uint32_t rd : 5; + uint32_t op : 2; + } u __attribute__((packed)); + } __attribute__((packed)) enc = {}; + static_assert(sizeof(Format0a) == 4, " "); + + enc.flat |= bytes[0]; + enc.flat <<= 8; + enc.flat |= bytes[1]; + enc.flat <<= 8; + enc.flat |= bytes[2]; + enc.flat <<= 8; + enc.flat |= bytes[3]; + + // This looks like an `unimp ` instruction, where the `imm22` encodes + // the size of the value to return. See "Specificationming Note" in v8 manual, + // B.31, p 137. + // + // TODO(pag, kumarak): Does a zero value in `enc.u.imm22` imply a no-return + // function? Try this on Compiler Explorer! + if (!enc.u.op && !enc.u.op2) { + LOG(INFO) << "Found structure return of size " << enc.u.imm22 << " to " + << std::hex << pc << " at " << inst.pc << std::dec; + + llvm::IRBuilder<> ir(block); + return {pc + 4u, + ir.CreateAdd(ret_pc, llvm::ConstantInt::get(ret_pc->getType(), 4))}; + + } else { + return {pc, ret_pc}; + } +} + + +bool BasicBlockLifter::DoInterProceduralControlFlow( + const remill::Instruction &insn, llvm::BasicBlock *block, + const anvill::ControlFlowOverride &override) { + // only handle inter-proc since intra-proc are handled implicitly by the CFG. + llvm::IRBuilder<> builder(block); + if (std::holds_alternative(override)) { + auto cc = std::get(override); + this->AddCallFromBasicBlockFunctionToLifted( + block, this->intrinsics.function_call, this->intrinsics); + if (!cc.stop) { + auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); + auto npc = remill::LoadNextProgramCounterRef(block); + auto pc = remill::LoadProgramCounterRef(block); + builder.CreateStore(raddr, npc); + builder.CreateStore(raddr, pc); + } else { + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics, true); + } + return !cc.stop; + } else if (std::holds_alternative(override)) { + remill::AddTerminatingTailCall(block, intrinsics.function_return, + intrinsics, true); + return false; + } + + return true; +} + + +bool BasicBlockLifter::ApplyInterProceduralControlFlowOverride( + const remill::Instruction &insn, llvm::BasicBlock *&block) { + + + // if this instruction is conditional and interprocedural then we are going to split the block into a case were we do take it and a branch where we dont and then rejoin + + auto override = options.control_flow_provider.GetControlFlowOverride(insn.pc); + + if ((std::holds_alternative(override) || + std::holds_alternative(override))) { + if (std::holds_alternative( + insn.flows)) { + auto btaken = remill::LoadBranchTaken(block); + llvm::IRBuilder<> builder(block); + auto do_control_flow = + llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); + auto continuation = + llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); + builder.CreateCondBr(btaken, do_control_flow, continuation); + + // if the interprocedural control flow block isnt terminal link it back up + if (this->DoInterProceduralControlFlow(insn, do_control_flow, override)) { + llvm::BranchInst::Create(continuation, do_control_flow); + } + + block = continuation; + return true; + } else { + return this->DoInterProceduralControlFlow(insn, block, override); + } + } + + return true; +} + +remill::DecodingContext +BasicBlockLifter::CreateDecodingContext(const CodeBlock &blk) { + auto init_context = this->options.arch->CreateInitialContext(); + return this->ApplyContextAssignments(blk.context_assignments, + std::move(init_context)); +} + +// Try to decode an instruction at address `addr` into `*inst_out`. Returns +// the context map of the decoded instruction if successful and std::nullopt otherwise. `is_delayed` tells the decoder +// whether or not the instruction being decoded is being decoded inside of a +// delay slot of another instruction. +bool BasicBlockLifter::DecodeInstructionInto(const uint64_t addr, + bool is_delayed, + remill::Instruction *inst_out, + remill::DecodingContext context) { + static const auto max_inst_size = options.arch->MaxInstructionSize(context); + inst_out->Reset(); + + // Read the maximum number of bytes possible for instructions on this + // architecture. For x86(-64), this is 15 bytes, whereas for fixed-width + // architectures like AArch32/AArch64 and SPARC32/SPARC64, this is 4 bytes. + inst_out->bytes.reserve(max_inst_size); + + auto accumulate_inst_byte = [=](auto byte, auto accessible, auto perms) { + switch (accessible) { + case ByteAvailability::kUnknown: + case ByteAvailability::kUnavailable: return false; + default: + switch (perms) { + case BytePermission::kUnknown: + case BytePermission::kReadableExecutable: + case BytePermission::kReadableWritableExecutable: + inst_out->bytes.push_back(static_cast(byte)); + return true; + case BytePermission::kReadable: + case BytePermission::kReadableWritable: return false; + } + } + }; + + for (auto i = 0u; i < max_inst_size; ++i) { + if (!std::apply(accumulate_inst_byte, memory_provider.Query(addr + i))) { + break; + } + } + + if (is_delayed) { + return options.arch->DecodeDelayedInstruction( + addr, inst_out->bytes, *inst_out, std::move(context)); + } else { + DLOG(INFO) << "Ops emplace: " << inst_out->operands.size(); + return options.arch->DecodeInstruction(addr, inst_out->bytes, *inst_out, + std::move(context)); + } +} + + +void BasicBlockLifter::LiftBasicBlockIntoFunction( + BasicBlockFunction &basic_block_function) { + auto entry_block = &basic_block_function.func->getEntryBlock(); + + auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), + "", basic_block_function.func); + + + llvm::BranchInst::Create(bb, entry_block); + + remill::Instruction inst; + + auto reached_addr = this->block_def.addr; + // TODO(Ian): use a different context + + auto init_context = this->CreateDecodingContext(this->block_def); + + + bool ended_on_terminal = false; + while (reached_addr < this->block_def.addr + this->block_def.size && + !ended_on_terminal) { + auto addr = reached_addr; + auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); + if (!res) { + LOG(FATAL) << "Failed to decode insn in block " << std::hex << addr; + } + + reached_addr += inst.bytes.size(); + + // Even when something isn't supported or is invalid, we still lift + // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want + // to treat instruction lifting as an operation that can't fail. + + CHECK(llvm::isa_and_nonnull( + basic_block_function.state_ptr) && + llvm::cast(basic_block_function.state_ptr) + ->getParent() + ->getParent() == basic_block_function.func); + std::ignore = inst.GetLifter()->LiftIntoBlock( + inst, bb, basic_block_function.state_ptr, false /* is_delayed */); + + ended_on_terminal = + !this->ApplyInterProceduralControlFlowOverride(inst, bb); + } + + if (!ended_on_terminal) { + llvm::IRBuilder<> builder(bb); + + builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), + basic_block_function.next_pc_out_param); + auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); + llvm::ReturnInst::Create(bb->getContext(), memory, bb); + } + this->RecursivelyInlineFunctionCallees(basic_block_function.func); +} + + +BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { + std::string name_ = "basic_block_func" + std::to_string(this->block_def.addr); + auto &context = this->semantics_module->getContext(); + llvm::FunctionType *lifted_func_type = + llvm::dyn_cast(remill::RecontextualizeType( + this->options.arch->LiftedFunctionType(), context)); + + std::vector params = std::vector( + lifted_func_type->param_begin(), lifted_func_type->param_end()); + params.push_back(llvm::PointerType::get(context, 0)); + + llvm::FunctionType *func_type = + llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); + + + llvm::StringRef name(name_.data(), name_.size()); + auto func = + llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, + name, this->semantics_module.get()); + + func->setMetadata(anvill::kBasicBlockMetadata, + GetBasicBlockAnnotation(block.addr)); + + auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); + auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); + auto pc = remill::NthArgument(func, remill::kPCArgNum); + auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); + memory->setName("memory"); + out_state->setName("state_out"); + pc->setName("program_counter"); + next_pc_out->setName("next_pc_out"); + + options.arch->InitializeEmptyLiftedFunction(func); + + + auto &blk = func->getEntryBlock(); + llvm::IRBuilder<> ir(&blk); + + auto state = this->AllocateAndInitializeStateStructure(&blk, options.arch); + // Put registers that are referencing the stack in terms of their displacement so that we + // Can resolve these stack references later . + + + auto stack_offsets = this->curr_decl->stack_offsets.find(block.addr); + + if (stack_offsets != this->curr_decl->stack_offsets.end()) { + for (auto ®_off : stack_offsets->second.affine_equalities) { + if (reg_off.base_register && reg_off.base_register == this->sp_reg) { + auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( + ir, this->sp_reg, block.addr, reg_off.offset); + StoreNativeValueToRegister(new_value, reg_off.target_register, + type_provider.Dictionary(), intrinsics, &blk, + state); + } + } + } + + + auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); + auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); + + + func->addFnAttr(llvm::Attribute::NoInline); + func->setLinkage(llvm::GlobalValue::InternalLinkage); + + BasicBlockFunction bbf{func, state, pc_arg, mem_arg, next_pc_out}; + addr_to_bb_func[block.addr] = bbf; + + return bbf; +} + +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index e69de29bb..827bf27a8 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -0,0 +1,75 @@ + +#include +#include +#include +#include +#include +#include + +#include "CodeLifter.h" +#include "anvill/Declarations.h" + +namespace anvill { + + +struct BasicBlockFunction { + llvm::Function *func; + llvm::Value *state_ptr; + llvm::Argument *pc_arg; + llvm::Argument *mem_ptr; + llvm::Argument *next_pc_out_param; +}; + +/** + * @brief A BasicBlockLifter lifts a basic block as a native function that takes + * in scope variables and returns in scope variables (essentially an SSAed form of the entire block) + * In addition to variables a basic block also returns the successor of this block (if it exists, ie. function returns are terminating tail calls) to the caller given the input state. + */ +class BasicBlockLifter : public CodeLifter { + private: + const BasicBlockContext &block_context; + const CodeBlock &block_def; + const LifterOptions &options; + llvm::Module *semantics_module; + + // The allocated state ptr for the function. + llvm::Value *state_ptr; + + remill::DecodingContext ApplyContextAssignments( + const std::unordered_map &assignments, + remill::DecodingContext prev_context); + + remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); + + BasicBlockLifter(const BasicBlockContext &block_context, + const CodeBlock &block_def, const LifterOptions &options_); + + void LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function); + + BasicBlockFunction CreateBasicBlockFunction(); + + bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &insn, + llvm::BasicBlock *&block); + + bool + DoInterProceduralControlFlow(const remill::Instruction &insn, + llvm::BasicBlock *block, + const anvill::ControlFlowOverride &override); + + llvm::CallInst *AddCallFromBasicBlockFunctionToLifted( + llvm::BasicBlock *source_block, llvm::Function *dest_func, + const remill::IntrinsicTable &intrinsics); + + std::pair + LoadFunctionReturnAddress(const remill::Instruction &inst, + llvm::BasicBlock *block); + + bool DecodeInstructionInto(const uint64_t addr, bool is_delayed, + remill::Instruction *inst_out, + remill::DecodingContext context); + + public: + llvm::Function *LiftBasicBlockFunction(); +}; + +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp new file mode 100644 index 000000000..daf85962b --- /dev/null +++ b/lib/Lifters/CodeLifter.cpp @@ -0,0 +1,121 @@ +#include "CodeLifter.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace anvill { +namespace { +// Clear out LLVM variable names. They're usually not helpful. +static void ClearVariableNames(llvm::Function *func) { + for (auto &block : *func) { + // block.setName(llvm::Twine::createNull()); + for (auto &inst : block) { + if (inst.hasName()) { + inst.setName(llvm::Twine::createNull()); + } + } + } +} +} // namespace + + +void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { + std::vector calls_to_inline; + + // Set of instructions that we should not annotate because we can't tie them + // to a particular instruction address. + std::unordered_set insts_without_provenance; + if (options.pc_metadata_name) { + for (auto &inst : llvm::instructions(*inf)) { + if (!inst.getMetadata(pc_annotation_id)) { + insts_without_provenance.insert(&inst); + } + } + } + + for (auto changed = true; changed; changed = !calls_to_inline.empty()) { + calls_to_inline.clear(); + + for (auto &inst : llvm::instructions(*inf)) { + if (auto call_inst = llvm::dyn_cast(&inst); call_inst) { + if (auto called_func = call_inst->getCalledFunction(); + called_func && !called_func->isDeclaration() && + !called_func->hasFnAttribute(llvm::Attribute::NoInline)) { + calls_to_inline.push_back(call_inst); + } + } + } + + for (llvm::CallInst *call_inst : calls_to_inline) { + llvm::MDNode *call_pc = nullptr; + if (options.pc_metadata_name) { + call_pc = call_inst->getMetadata(pc_annotation_id); + } + + llvm::InlineFunctionInfo info; + auto res = llvm::InlineFunction(*call_inst, info); + + CHECK(res.isSuccess()); + + // Propagate PC metadata from call sites into inlined call bodies. + if (options.pc_metadata_name) { + for (auto &inst : llvm::instructions(*inf)) { + if (!inst.getMetadata(pc_annotation_id)) { + if (insts_without_provenance.count(&inst)) { + continue; + + // This call site had no associated PC metadata, and so we want + // to exclude any inlined code from accidentally being associated + // with other PCs on future passes. + } else if (!call_pc) { + insts_without_provenance.insert(&inst); + + // We can propagate the annotation. + } else { + inst.setMetadata(pc_annotation_id, call_pc); + } + } + } + } + } + } + + // Initialize cleanup optimizations + + + if (llvm::verifyFunction(*inf, &llvm::errs())) { + + LOG(FATAL) << "Function verification failed: " << inf->getName().str() + << " " << remill::LLVMThingToString(inf->getType()); + } + + llvm::legacy::FunctionPassManager fpm(inf->getParent()); + fpm.add(llvm::createCFGSimplificationPass()); + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + fpm.add(llvm::createReassociatePass()); + fpm.add(llvm::createDeadStoreEliminationPass()); + fpm.add(llvm::createDeadCodeEliminationPass()); + fpm.add(llvm::createSROAPass()); + fpm.add(llvm::createDeadCodeEliminationPass()); + fpm.add(llvm::createInstructionCombiningPass()); + fpm.doInitialization(); + fpm.run(*inf); + fpm.doFinalization(); + + ClearVariableNames(inf); +} +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h new file mode 100644 index 000000000..4587bd80e --- /dev/null +++ b/lib/Lifters/CodeLifter.h @@ -0,0 +1,43 @@ +#include +#include +#include + +#include "anvill/Lifters.h" + +namespace anvill { +/** + * @brief A class that lifts machine level semantics to llvm + * + */ +class CodeLifter { + protected: + const LifterOptions &options; + + // Remill intrinsics inside of `module`. + remill::IntrinsicTable intrinsics; + + remill::OperandLifter::OpLifterPtr op_lifter; + + // Are we lifting SPARC code? This affects whether or not we need to do + // double checking on function return addresses; + const bool is_sparc; + + // Are we lifting x86(-64) code? + const bool is_x86_or_amd64; + + + const MemoryProvider &memory_provider; + const TypeProvider &type_provider; + const TypeTranslator type_specifier; + + + void RecursivelyInlineFunctionCallees(llvm::Function *inf); + + + unsigned pc_annotation_id; + + public: + CodeLifter(const LifterOptions &options); +}; + +} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 5f61f1cb6..7cc830a10 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -8,7 +8,6 @@ #include "FunctionLifter.h" -#include <_types/_uint64_t.h> #include #include #include @@ -69,18 +68,6 @@ namespace anvill { namespace { -// Clear out LLVM variable names. They're usually not helpful. -static void ClearVariableNames(llvm::Function *func) { - for (auto &block : *func) { - // block.setName(llvm::Twine::createNull()); - for (auto &inst : block) { - if (inst.hasName()) { - inst.setName(llvm::Twine::createNull()); - } - } - } -} - // A function that ensures that the memory pointer escapes, and thus none of // the memory writes at the end of a function are lost. @@ -239,53 +226,6 @@ FunctionLifter::GetOrCreateTargetBlock(const remill::Instruction &from_inst, return GetOrCreateBlock(to_addr); } -// Try to decode an instruction at address `addr` into `*inst_out`. Returns -// the context map of the decoded instruction if successful and std::nullopt otherwise. `is_delayed` tells the decoder -// whether or not the instruction being decoded is being decoded inside of a -// delay slot of another instruction. -bool FunctionLifter::DecodeInstructionInto(const uint64_t addr, bool is_delayed, - remill::Instruction *inst_out, - remill::DecodingContext context) { - static const auto max_inst_size = options.arch->MaxInstructionSize(context); - inst_out->Reset(); - - // Read the maximum number of bytes possible for instructions on this - // architecture. For x86(-64), this is 15 bytes, whereas for fixed-width - // architectures like AArch32/AArch64 and SPARC32/SPARC64, this is 4 bytes. - inst_out->bytes.reserve(max_inst_size); - - auto accumulate_inst_byte = [=](auto byte, auto accessible, auto perms) { - switch (accessible) { - case ByteAvailability::kUnknown: - case ByteAvailability::kUnavailable: return false; - default: - switch (perms) { - case BytePermission::kUnknown: - case BytePermission::kReadableExecutable: - case BytePermission::kReadableWritableExecutable: - inst_out->bytes.push_back(static_cast(byte)); - return true; - case BytePermission::kReadable: - case BytePermission::kReadableWritable: return false; - } - } - }; - - for (auto i = 0u; i < max_inst_size; ++i) { - if (!std::apply(accumulate_inst_byte, memory_provider.Query(addr + i))) { - break; - } - } - - if (is_delayed) { - return options.arch->DecodeDelayedInstruction( - addr, inst_out->bytes, *inst_out, std::move(context)); - } else { - DLOG(INFO) << "Ops emplace: " << inst_out->operands.size(); - return options.arch->DecodeInstruction(addr, inst_out->bytes, *inst_out, - std::move(context)); - } -} void FunctionLifter::InsertError(llvm::BasicBlock *block) { llvm::IRBuilder<> ir{block}; @@ -296,15 +236,6 @@ void FunctionLifter::InsertError(llvm::BasicBlock *block) { } -remill::DecodingContext FunctionLifter::ApplyTargetList( - const std::unordered_map &assignments, - remill::DecodingContext prev_context) { - for (const auto &[k, v] : assignments) { - prev_context.UpdateContextReg(k, v); - } - return prev_context; -} - std::optional FunctionLifter::TryGetTargetFunctionType(const remill::Instruction &from_inst, std::uint64_t address) { @@ -315,97 +246,6 @@ FunctionLifter::TryGetTargetFunctionType(const remill::Instruction &from_inst, return opt_callable_decl; } -// Helper to figure out the address where execution will resume after a -// function call. In practice this is the instruction following the function -// call, encoded in `inst.branch_not_taken_pc`. However, SPARC has a terrible -// ABI where they inject an invalid instruction following some calls as a way -// of communicating to the callee that they should return an object of a -// particular, hard-coded size. Thus, we want to actually identify then ignore -// that instruction, and present the following address for where execution -// should resume after a `call`. -std::pair -FunctionLifter::LoadFunctionReturnAddress(const remill::Instruction &inst, - llvm::BasicBlock *block, - llvm::Value *state_ptr) { - - const auto pc = inst.branch_not_taken_pc; - - // The semantics for handling a call save the expected return program counter - // into a local variable. - auto ret_pc = this->op_lifter->LoadRegValue(block, state_ptr, - remill::kReturnPCVariableName); - if (!is_sparc) { - return {pc, ret_pc}; - } - - uint8_t bytes[4] = {}; - - for (auto i = 0u; i < 4u; ++i) { - auto [byte, accessible, perms] = memory_provider.Query(pc + i); - switch (accessible) { - case ByteAvailability::kUnknown: - case ByteAvailability::kUnavailable: - LOG(ERROR) - << "Byte at address " << std::hex << (pc + i) - << " is not available for inspection to figure out return address " - << " of call instruction at address " << pc << std::dec; - return {pc, ret_pc}; - - default: bytes[i] = byte; break; - } - - switch (perms) { - case BytePermission::kUnknown: - case BytePermission::kReadableExecutable: - case BytePermission::kReadableWritableExecutable: break; - case BytePermission::kReadable: - case BytePermission::kReadableWritable: - LOG(ERROR) - << "Byte at address " << std::hex << (pc + i) << " being inspected " - << "to figure out return address of call instruction at address " - << pc << " is not executable" << std::dec; - return {pc, ret_pc}; - } - } - - union Format0a { - uint32_t flat; - struct { - uint32_t imm22 : 22; - uint32_t op2 : 3; - uint32_t rd : 5; - uint32_t op : 2; - } u __attribute__((packed)); - } __attribute__((packed)) enc = {}; - static_assert(sizeof(Format0a) == 4, " "); - - enc.flat |= bytes[0]; - enc.flat <<= 8; - enc.flat |= bytes[1]; - enc.flat <<= 8; - enc.flat |= bytes[2]; - enc.flat <<= 8; - enc.flat |= bytes[3]; - - // This looks like an `unimp ` instruction, where the `imm22` encodes - // the size of the value to return. See "Specificationming Note" in v8 manual, - // B.31, p 137. - // - // TODO(pag, kumarak): Does a zero value in `enc.u.imm22` imply a no-return - // function? Try this on Compiler Explorer! - if (!enc.u.op && !enc.u.op2) { - LOG(INFO) << "Found structure return of size " << enc.u.imm22 << " to " - << std::hex << pc << " at " << inst.pc << std::dec; - - llvm::IRBuilder<> ir(block); - return {pc + 4u, - ir.CreateAdd(ret_pc, llvm::ConstantInt::get(ret_pc->getType(), 4))}; - - } else { - return {pc, ret_pc}; - } -} - // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. @@ -713,92 +553,6 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( } -void FunctionLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { - std::vector calls_to_inline; - - // Set of instructions that we should not annotate because we can't tie them - // to a particular instruction address. - std::unordered_set insts_without_provenance; - if (options.pc_metadata_name) { - for (auto &inst : llvm::instructions(*inf)) { - if (!inst.getMetadata(pc_annotation_id)) { - insts_without_provenance.insert(&inst); - } - } - } - - for (auto changed = true; changed; changed = !calls_to_inline.empty()) { - calls_to_inline.clear(); - - for (auto &inst : llvm::instructions(*inf)) { - if (auto call_inst = llvm::dyn_cast(&inst); call_inst) { - if (auto called_func = call_inst->getCalledFunction(); - called_func && !called_func->isDeclaration() && - !called_func->hasFnAttribute(llvm::Attribute::NoInline)) { - calls_to_inline.push_back(call_inst); - } - } - } - - for (llvm::CallInst *call_inst : calls_to_inline) { - llvm::MDNode *call_pc = nullptr; - if (options.pc_metadata_name) { - call_pc = call_inst->getMetadata(pc_annotation_id); - } - - llvm::InlineFunctionInfo info; - auto res = llvm::InlineFunction(*call_inst, info); - - CHECK(res.isSuccess()); - - // Propagate PC metadata from call sites into inlined call bodies. - if (options.pc_metadata_name) { - for (auto &inst : llvm::instructions(*inf)) { - if (!inst.getMetadata(pc_annotation_id)) { - if (insts_without_provenance.count(&inst)) { - continue; - - // This call site had no associated PC metadata, and so we want - // to exclude any inlined code from accidentally being associated - // with other PCs on future passes. - } else if (!call_pc) { - insts_without_provenance.insert(&inst); - - // We can propagate the annotation. - } else { - inst.setMetadata(pc_annotation_id, call_pc); - } - } - } - } - } - } - - // Initialize cleanup optimizations - - - if (llvm::verifyFunction(*native_func, &llvm::errs())) { - - LOG(FATAL) << "Function verification failed: " - << native_func->getName().str() << " " - << remill::LLVMThingToString(native_func->getType()); - } - - llvm::legacy::FunctionPassManager fpm(semantics_module.get()); - fpm.add(llvm::createCFGSimplificationPass()); - fpm.add(llvm::createPromoteMemoryToRegisterPass()); - fpm.add(llvm::createReassociatePass()); - fpm.add(llvm::createDeadStoreEliminationPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createSROAPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createInstructionCombiningPass()); - fpm.doInitialization(); - fpm.run(*native_func); - fpm.doFinalization(); - - ClearVariableNames(native_func); -} // In practice, lifted functions are not workable as is; we need to emulate // `__attribute__((flatten))`, i.e. recursively inline as much as possible, so // that all semantics and helpers are completely inlined. @@ -820,21 +574,6 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { } -llvm::CallInst *FunctionLifter::AddCallFromBasicBlockFunctionToLifted( - llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics) { - auto func = source_block->getParent(); - llvm::IRBuilder<> ir(source_block); - std::array args; - args[remill::kMemoryPointerArgNum] = - NthArgument(func, remill::kMemoryPointerArgNum); - args[remill::kStatePointerArgNum] = - NthArgument(func, remill::kStatePointerArgNum); - args[remill::kPCArgNum] = NthArgument(func, remill::kPCArgNum); - return ir.CreateCall(dest_func, args); -} - - llvm::CallInst * FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( llvm::BasicBlock *source_block, llvm::Function *dest_func, @@ -850,137 +589,6 @@ FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( return call; } -bool FunctionLifter::DoInterProceduralControlFlow( - const remill::Instruction &insn, llvm::BasicBlock *block, - const anvill::ControlFlowOverride &override, llvm::Value *state_ptr) { - // only handle inter-proc since intra-proc are handled implicitly by the CFG. - llvm::IRBuilder<> builder(block); - if (std::holds_alternative(override)) { - auto cc = std::get(override); - this->AddCallFromBasicBlockFunctionToLifted( - block, this->intrinsics.function_call, this->intrinsics); - if (!cc.stop) { - auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block, state_ptr); - auto npc = remill::LoadNextProgramCounterRef(block); - auto pc = remill::LoadProgramCounterRef(block); - builder.CreateStore(raddr, npc); - builder.CreateStore(raddr, pc); - } else { - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics, true); - } - return !cc.stop; - } else if (std::holds_alternative(override)) { - remill::AddTerminatingTailCall(block, intrinsics.function_return, - intrinsics, true); - return false; - } - - return true; -} -bool FunctionLifter::ApplyInterProceduralControlFlowOverride( - const remill::Instruction &insn, llvm::BasicBlock *&block, - llvm::Value *state_ptr) { - - - // if this instruction is conditional and interprocedural then we are going to split the block into a case were we do take it and a branch where we dont and then rejoin - - auto override = options.control_flow_provider.GetControlFlowOverride(insn.pc); - - if ((std::holds_alternative(override) || - std::holds_alternative(override))) { - if (std::holds_alternative( - insn.flows)) { - auto btaken = remill::LoadBranchTaken(block); - llvm::IRBuilder<> builder(block); - auto do_control_flow = - llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); - auto continuation = - llvm::BasicBlock::Create(block->getContext(), "", block->getParent()); - builder.CreateCondBr(btaken, do_control_flow, continuation); - - // if the interprocedural control flow block isnt terminal link it back up - if (this->DoInterProceduralControlFlow(insn, do_control_flow, override, - state_ptr)) { - llvm::BranchInst::Create(continuation, do_control_flow); - } - - block = continuation; - return true; - } else { - return this->DoInterProceduralControlFlow(insn, block, override, - state_ptr); - } - } - - return true; -} - - -remill::DecodingContext -FunctionLifter::CreateDecodingContext(const CodeBlock &blk) { - auto init_context = this->options.arch->CreateInitialContext(); - return this->ApplyTargetList(blk.context_assignments, - std::move(init_context)); -} - -llvm::BasicBlock *FunctionLifter::LiftBasicBlockIntoFunction( - BasicBlockFunction &basic_block_function, const CodeBlock &blk) { - auto entry_block = &basic_block_function.func->getEntryBlock(); - - auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), - "", basic_block_function.func); - - - llvm::BranchInst::Create(bb, entry_block); - - remill::Instruction inst; - - auto reached_addr = blk.addr; - // TODO(Ian): use a different context - - auto init_context = this->CreateDecodingContext(blk); - - - bool ended_on_terminal = false; - while (reached_addr < blk.addr + blk.size && !ended_on_terminal) { - auto addr = reached_addr; - auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); - if (!res) { - LOG(FATAL) << "Failed to decode insn in block " << std::hex << addr; - } - - reached_addr += inst.bytes.size(); - - // Even when something isn't supported or is invalid, we still lift - // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want - // to treat instruction lifting as an operation that can't fail. - - CHECK(llvm::isa_and_nonnull( - basic_block_function.state_ptr) && - llvm::cast(basic_block_function.state_ptr) - ->getParent() - ->getParent() == basic_block_function.func); - std::ignore = inst.GetLifter()->LiftIntoBlock( - inst, bb, basic_block_function.state_ptr, false /* is_delayed */); - - ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride( - inst, bb, basic_block_function.state_ptr); - } - - if (!ended_on_terminal) { - llvm::IRBuilder<> builder(bb); - - builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), - basic_block_function.next_pc_out_param); - auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); - llvm::ReturnInst::Create(bb->getContext(), memory, bb); - } - this->RecursivelyInlineFunctionCallees(basic_block_function.func); - - return bb; -} - - llvm::CallInst *FunctionLifter::CallBasicBlockFunction( uint64_t block_addr, llvm::BasicBlock *add_to_llvm, llvm::Function *bb_func, llvm::Value *parent_state, llvm::ArrayRef extra_args, @@ -1055,79 +663,6 @@ llvm::Function *FunctionLifter::GetBasicBlockFunction(uint64_t address) const { return it->second.func; } -BasicBlockFunction -FunctionLifter::CreateBasicBlockFunction(const CodeBlock &block) { - std::string name_ = "basic_block_func" + std::to_string(block.addr); - auto &context = this->semantics_module->getContext(); - llvm::FunctionType *lifted_func_type = - llvm::dyn_cast(remill::RecontextualizeType( - this->options.arch->LiftedFunctionType(), context)); - - std::vector params = std::vector( - lifted_func_type->param_begin(), lifted_func_type->param_end()); - params.push_back(llvm::PointerType::get(context, 0)); - - llvm::FunctionType *func_type = - llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); - - - llvm::StringRef name(name_.data(), name_.size()); - auto func = - llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, - name, this->semantics_module.get()); - - func->setMetadata(anvill::kBasicBlockMetadata, - GetBasicBlockAnnotation(block.addr)); - - auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); - auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); - auto pc = remill::NthArgument(func, remill::kPCArgNum); - auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); - memory->setName("memory"); - out_state->setName("state_out"); - pc->setName("program_counter"); - next_pc_out->setName("next_pc_out"); - - options.arch->InitializeEmptyLiftedFunction(func); - - - auto &blk = func->getEntryBlock(); - llvm::IRBuilder<> ir(&blk); - - auto state = this->AllocateAndInitializeStateStructure(&blk, options.arch); - // Put registers that are referencing the stack in terms of their displacement so that we - // Can resolve these stack references later . - - - auto stack_offsets = this->curr_decl->stack_offsets.find(block.addr); - - if (stack_offsets != this->curr_decl->stack_offsets.end()) { - for (auto ®_off : stack_offsets->second.affine_equalities) { - if (reg_off.base_register && reg_off.base_register == this->sp_reg) { - auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( - ir, this->sp_reg, block.addr, reg_off.offset); - StoreNativeValueToRegister(new_value, reg_off.target_register, - type_provider.Dictionary(), intrinsics, &blk, - state); - } - } - } - - blk.dump(); - - - auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); - auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); - - - func->addFnAttr(llvm::Attribute::NoInline); - func->setLinkage(llvm::GlobalValue::InternalLinkage); - - BasicBlockFunction bbf{func, state, pc_arg, mem_arg, next_pc_out}; - addr_to_bb_func[block.addr] = bbf; - - return bbf; -} LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { auto new_func = diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index ba50be00b..541790f8d 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -30,6 +30,7 @@ #include #include "BasicBlockTransform.h" +#include "CodeLifter.h" namespace llvm { class Constant; @@ -70,7 +71,7 @@ struct LiftedFunction { // Orchestrates lifting of instructions and control-flow between instructions. -class FunctionLifter { +class FunctionLifter : public CodeLifter { public: ~FunctionLifter(void); @@ -105,9 +106,6 @@ class FunctionLifter { private: const LifterOptions &options; - const MemoryProvider &memory_provider; - const TypeProvider &type_provider; - const TypeTranslator type_specifier; // Semantics module containing all instruction semantics. std::unique_ptr semantics_module; From f6609a8a6a003a0590b0bf8bae9d85c1f84072d7 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 7 Dec 2022 13:54:14 -0500 Subject: [PATCH 038/163] pulled decl info into context --- include/anvill/Declarations.h | 1 + lib/Lifters/BasicBlockLifter.cpp | 30 ++++--- lib/Lifters/BasicBlockLifter.h | 6 +- lib/Lifters/CodeLifter.cpp | 134 ++++++++++++++++++++++++++++++ lib/Lifters/CodeLifter.h | 28 +++++++ lib/Lifters/FunctionLifter.cpp | 138 ------------------------------- lib/Lifters/FunctionLifter.h | 20 ----- 7 files changed, 184 insertions(+), 173 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 038c65604..73faa312f 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -201,6 +201,7 @@ struct LocalVariableDecl { class BasicBlockContext { public: virtual std::vector GetAvailableVariables() const = 0; + virtual const SpecStackOffsets &GetStackOffsets() const = 0; }; struct FunctionDecl; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e186ce71e..42ced2d3d 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -1,6 +1,7 @@ #include "BasicBlockLifter.h" #include +#include #include #include #include @@ -308,6 +309,12 @@ void BasicBlockLifter::LiftBasicBlockIntoFunction( } +llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { + auto pc_val = llvm::ConstantInt::get(address_type, addr); + auto pc_md = llvm::ValueAsMetadata::get(pc_val); + return llvm::MDNode::get(this->semantics_module->getContext(), pc_md); +} + BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { std::string name_ = "basic_block_func" + std::to_string(this->block_def.addr); auto &context = this->semantics_module->getContext(); @@ -326,10 +333,10 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { llvm::StringRef name(name_.data(), name_.size()); auto func = llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, - name, this->semantics_module.get()); + name, this->semantics_module); func->setMetadata(anvill::kBasicBlockMetadata, - GetBasicBlockAnnotation(block.addr)); + GetBasicBlockAnnotation(this->block_def.addr)); auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); @@ -351,17 +358,15 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Can resolve these stack references later . - auto stack_offsets = this->curr_decl->stack_offsets.find(block.addr); + auto stack_offsets = this->block_context.GetStackOffsets(); - if (stack_offsets != this->curr_decl->stack_offsets.end()) { - for (auto ®_off : stack_offsets->second.affine_equalities) { - if (reg_off.base_register && reg_off.base_register == this->sp_reg) { - auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( - ir, this->sp_reg, block.addr, reg_off.offset); - StoreNativeValueToRegister(new_value, reg_off.target_register, - type_provider.Dictionary(), intrinsics, &blk, - state); - } + for (auto ®_off : stack_offsets.affine_equalities) { + if (reg_off.base_register && reg_off.base_register == this->sp_reg) { + auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( + ir, this->sp_reg, this->block_def.addr, reg_off.offset); + StoreNativeValueToRegister(new_value, reg_off.target_register, + type_provider.Dictionary(), intrinsics, &blk, + state); } } @@ -374,7 +379,6 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->setLinkage(llvm::GlobalValue::InternalLinkage); BasicBlockFunction bbf{func, state, pc_arg, mem_arg, next_pc_out}; - addr_to_bb_func[block.addr] = bbf; return bbf; } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 827bf27a8..9c0b57b89 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -29,8 +29,6 @@ class BasicBlockLifter : public CodeLifter { private: const BasicBlockContext &block_context; const CodeBlock &block_def; - const LifterOptions &options; - llvm::Module *semantics_module; // The allocated state ptr for the function. llvm::Value *state_ptr; @@ -68,6 +66,10 @@ class BasicBlockLifter : public CodeLifter { remill::Instruction *inst_out, remill::DecodingContext context); + + llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; + + public: llvm::Function *LiftBasicBlockFunction(); }; diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index daf85962b..146aa9ed9 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -33,6 +33,140 @@ static void ClearVariableNames(llvm::Function *func) { } // namespace +// Perform architecture-specific initialization of the state structure +// in `block`. +void CodeLifter::ArchSpecificStateStructureInitialization( + llvm::BasicBlock *block, llvm::Value *new_state_ptr) { + + if (is_x86_or_amd64) { + llvm::IRBuilder<> ir(block); + + const auto ssbase_reg = options.arch->RegisterByName("SSBASE"); + const auto fsbase_reg = options.arch->RegisterByName("FSBASE"); + const auto gsbase_reg = options.arch->RegisterByName("GSBASE"); + const auto dsbase_reg = options.arch->RegisterByName("DSBASE"); + const auto esbase_reg = options.arch->RegisterByName("ESBASE"); + const auto csbase_reg = options.arch->RegisterByName("CSBASE"); + + if (gsbase_reg) { + const auto gsbase_val = llvm::ConstantExpr::getPtrToInt( + llvm::ConstantExpr::getAddrSpaceCast( + llvm::ConstantExpr::getNullValue( + llvm::PointerType::get(block->getContext(), 256)), + llvm::PointerType::get(block->getContext(), 0)), + pc_reg_type); + ir.CreateStore(gsbase_val, gsbase_reg->AddressOf(new_state_ptr, ir)); + } + + if (fsbase_reg) { + const auto fsbase_val = llvm::ConstantExpr::getPtrToInt( + llvm::ConstantExpr::getAddrSpaceCast( + llvm::ConstantExpr::getNullValue( + llvm::PointerType::get(block->getContext(), 257)), + llvm::PointerType::get(block->getContext(), 0)), + pc_reg_type); + ir.CreateStore(fsbase_val, fsbase_reg->AddressOf(new_state_ptr, ir)); + } + + if (ssbase_reg) { + ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), + ssbase_reg->AddressOf(new_state_ptr, ir)); + } + + if (dsbase_reg) { + ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), + dsbase_reg->AddressOf(new_state_ptr, ir)); + } + + if (esbase_reg) { + ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), + esbase_reg->AddressOf(new_state_ptr, ir)); + } + + if (csbase_reg) { + ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), + csbase_reg->AddressOf(new_state_ptr, ir)); + } + } +} + + +// Initialize the state structure with default values, loaded from global +// variables. The purpose of these global variables is to show that there are +// some unmodelled external dependencies inside of a lifted function. +void CodeLifter::InitializeStateStructureFromGlobalRegisterVariables( + llvm::BasicBlock *block, llvm::Value *state_ptr) { + + // Get or create globals for all top-level registers. The idea here is that + // the spec could feasibly miss some dependencies, and so after optimization, + // we'll be able to observe uses of `__anvill_reg_*` globals, and handle + // them appropriately. + + llvm::IRBuilder<> ir(block); + + options.arch->ForEachRegister([=, &ir](const remill::Register *reg_) { + if (auto reg = reg_->EnclosingRegister(); + reg_ == reg && reg != sp_reg && reg != pc_reg) { + + std::stringstream ss; + ss << kUnmodelledRegisterPrefix << reg->name; + const auto reg_name = ss.str(); + + auto reg_global = semantics_module->getGlobalVariable(reg_name); + if (!reg_global) { + reg_global = new llvm::GlobalVariable( + *semantics_module, reg->type, false, + llvm::GlobalValue::ExternalLinkage, nullptr, reg_name); + } + + const auto reg_ptr = reg->AddressOf(state_ptr, block); + ir.CreateStore(ir.CreateLoad(reg->type, reg_global), reg_ptr); + } + }); +} + +// Allocate and initialize the state structure. +llvm::Value * +CodeLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, + const remill::Arch *arch) { + llvm::IRBuilder<> ir(block); + const auto state_type = arch->StateStructType(); + llvm::Value *new_state_ptr = nullptr; + + switch (options.state_struct_init_procedure) { + case StateStructureInitializationProcedure::kNone: + new_state_ptr = ir.CreateAlloca(state_type); + break; + case StateStructureInitializationProcedure::kZeroes: + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); + break; + case StateStructureInitializationProcedure::kUndef: + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); + break; + case StateStructureInitializationProcedure::kGlobalRegisterVariables: + new_state_ptr = ir.CreateAlloca(state_type); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); + break; + case StateStructureInitializationProcedure:: + kGlobalRegisterVariablesAndZeroes: + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); + break; + case StateStructureInitializationProcedure:: + kGlobalRegisterVariablesAndUndef: + new_state_ptr = ir.CreateAlloca(state_type); + ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); + InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); + break; + } + + ArchSpecificStateStructureInitialization(block, new_state_ptr); + return new_state_ptr; +} + void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { std::vector calls_to_inline; diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 4587bd80e..a3be6b502 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -1,3 +1,4 @@ +#include #include #include #include @@ -18,21 +19,48 @@ class CodeLifter { remill::OperandLifter::OpLifterPtr op_lifter; + // Are we lifting SPARC code? This affects whether or not we need to do // double checking on function return addresses; const bool is_sparc; // Are we lifting x86(-64) code? const bool is_x86_or_amd64; + // Specification counter and stack pointer registers. + const remill::Register *const pc_reg; + const remill::Register *const sp_reg; const MemoryProvider &memory_provider; const TypeProvider &type_provider; const TypeTranslator type_specifier; + llvm::IntegerType *const address_type; + + llvm::Module *semantics_module; + + // Convenient to keep around. + llvm::Type *const i8_type; + llvm::Constant *const i8_zero; + llvm::Type *const i32_type; + llvm::PointerType *const mem_ptr_type; + llvm::PointerType *const state_ptr_type; + + llvm::Type *const pc_reg_type{nullptr}; void RecursivelyInlineFunctionCallees(llvm::Function *inf); + // Allocate and initialize the state structure. + llvm::Value *AllocateAndInitializeStateStructure(llvm::BasicBlock *block, + const remill::Arch *arch); + + + void + InitializeStateStructureFromGlobalRegisterVariables(llvm::BasicBlock *block, + llvm::Value *state_ptr); + + void ArchSpecificStateStructureInitialization(llvm::BasicBlock *block, + llvm::Value *new_state_ptr); unsigned pc_annotation_id; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 7cc830a10..fb3e7511a 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -259,11 +259,6 @@ llvm::MDNode *FunctionLifter::GetPCAnnotation(uint64_t pc) const { } } -llvm::MDNode *FunctionLifter::GetBasicBlockAnnotation(uint64_t addr) const { - auto pc_val = llvm::ConstantInt::get(address_type, addr); - auto pc_md = llvm::ValueAsMetadata::get(pc_val); - return llvm::MDNode::get(llvm_context, pc_md); -} // Declare the function decl `decl` and return an `llvm::Function *`. llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { @@ -310,139 +305,6 @@ llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { return native_func; } -// Allocate and initialize the state structure. -llvm::Value * -FunctionLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, - const remill::Arch *arch) { - llvm::IRBuilder<> ir(block); - const auto state_type = arch->StateStructType(); - llvm::Value *new_state_ptr = nullptr; - - switch (options.state_struct_init_procedure) { - case StateStructureInitializationProcedure::kNone: - new_state_ptr = ir.CreateAlloca(state_type); - break; - case StateStructureInitializationProcedure::kZeroes: - new_state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); - break; - case StateStructureInitializationProcedure::kUndef: - new_state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); - break; - case StateStructureInitializationProcedure::kGlobalRegisterVariables: - new_state_ptr = ir.CreateAlloca(state_type); - InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); - break; - case StateStructureInitializationProcedure:: - kGlobalRegisterVariablesAndZeroes: - new_state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::Constant::getNullValue(state_type), new_state_ptr); - InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); - break; - case StateStructureInitializationProcedure:: - kGlobalRegisterVariablesAndUndef: - new_state_ptr = ir.CreateAlloca(state_type); - ir.CreateStore(llvm::UndefValue::get(state_type), new_state_ptr); - InitializeStateStructureFromGlobalRegisterVariables(block, new_state_ptr); - break; - } - - ArchSpecificStateStructureInitialization(block, new_state_ptr); - return new_state_ptr; -} - -// Perform architecture-specific initialization of the state structure -// in `block`. -void FunctionLifter::ArchSpecificStateStructureInitialization( - llvm::BasicBlock *block, llvm::Value *new_state_ptr) { - - if (is_x86_or_amd64) { - llvm::IRBuilder<> ir(block); - - const auto ssbase_reg = options.arch->RegisterByName("SSBASE"); - const auto fsbase_reg = options.arch->RegisterByName("FSBASE"); - const auto gsbase_reg = options.arch->RegisterByName("GSBASE"); - const auto dsbase_reg = options.arch->RegisterByName("DSBASE"); - const auto esbase_reg = options.arch->RegisterByName("ESBASE"); - const auto csbase_reg = options.arch->RegisterByName("CSBASE"); - - if (gsbase_reg) { - const auto gsbase_val = llvm::ConstantExpr::getPtrToInt( - llvm::ConstantExpr::getAddrSpaceCast( - llvm::ConstantExpr::getNullValue( - llvm::PointerType::get(block->getContext(), 256)), - llvm::PointerType::get(block->getContext(), 0)), - pc_reg_type); - ir.CreateStore(gsbase_val, gsbase_reg->AddressOf(new_state_ptr, ir)); - } - - if (fsbase_reg) { - const auto fsbase_val = llvm::ConstantExpr::getPtrToInt( - llvm::ConstantExpr::getAddrSpaceCast( - llvm::ConstantExpr::getNullValue( - llvm::PointerType::get(block->getContext(), 257)), - llvm::PointerType::get(block->getContext(), 0)), - pc_reg_type); - ir.CreateStore(fsbase_val, fsbase_reg->AddressOf(new_state_ptr, ir)); - } - - if (ssbase_reg) { - ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - ssbase_reg->AddressOf(new_state_ptr, ir)); - } - - if (dsbase_reg) { - ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - dsbase_reg->AddressOf(new_state_ptr, ir)); - } - - if (esbase_reg) { - ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - esbase_reg->AddressOf(new_state_ptr, ir)); - } - - if (csbase_reg) { - ir.CreateStore(llvm::Constant::getNullValue(pc_reg_type), - csbase_reg->AddressOf(new_state_ptr, ir)); - } - } -} - -// Initialize the state structure with default values, loaded from global -// variables. The purpose of these global variables is to show that there are -// some unmodelled external dependencies inside of a lifted function. -void FunctionLifter::InitializeStateStructureFromGlobalRegisterVariables( - llvm::BasicBlock *block, llvm::Value *state_ptr) { - - // Get or create globals for all top-level registers. The idea here is that - // the spec could feasibly miss some dependencies, and so after optimization, - // we'll be able to observe uses of `__anvill_reg_*` globals, and handle - // them appropriately. - - llvm::IRBuilder<> ir(block); - - options.arch->ForEachRegister([=, &ir](const remill::Register *reg_) { - if (auto reg = reg_->EnclosingRegister(); - reg_ == reg && reg != sp_reg && reg != pc_reg) { - - std::stringstream ss; - ss << kUnmodelledRegisterPrefix << reg->name; - const auto reg_name = ss.str(); - - auto reg_global = semantics_module->getGlobalVariable(reg_name); - if (!reg_global) { - reg_global = new llvm::GlobalVariable( - *semantics_module, reg->type, false, - llvm::GlobalValue::ExternalLinkage, nullptr, reg_name); - } - - const auto reg_ptr = reg->AddressOf(state_ptr, block); - ir.CreateStore(ir.CreateLoad(reg->type, reg_global), reg_ptr); - } - }); -} - // Set up `native_func` to be able to call `lifted_func`. This means // marshalling high-level argument types into lower-level values to pass into // a stack-allocated `State` structure. This also involves providing initial diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 541790f8d..37e761058 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -118,26 +118,6 @@ class FunctionLifter : public CodeLifter { remill::OperandLifter::OpLifterPtr op_lifter; - // Specification counter and stack pointer registers. - const remill::Register *const pc_reg; - const remill::Register *const sp_reg; - - // Are we lifting SPARC code? This affects whether or not we need to do - // double checking on function return addresses; - const bool is_sparc; - - // Are we lifting x86(-64) code? - const bool is_x86_or_amd64; - - // Convenient to keep around. - llvm::Type *const i8_type; - llvm::Constant *const i8_zero; - llvm::Type *const i32_type; - llvm::PointerType *const mem_ptr_type; - llvm::PointerType *const state_ptr_type; - llvm::IntegerType *const address_type; - llvm::Type *const pc_reg_type{nullptr}; - // Metadata node to attach to lifted instructions to related them to // original instructions. unsigned pc_annotation_id{0}; From d0d50c3f6de2b01ff6e942ebd8023e7db73a86fc Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 7 Dec 2022 17:14:37 -0500 Subject: [PATCH 039/163] move construction to code lifter --- include/anvill/Declarations.h | 1 + lib/Lifters/FunctionLifter.cpp | 25 +------------------------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 73faa312f..493e86591 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -212,6 +212,7 @@ class SpecBlockContext : public BasicBlockContext { public: SpecBlockContext(const FunctionDecl &decl) : decl(decl) {} virtual std::vector GetAvailableVariables() const override; + virtual const SpecStackOffsets &GetStackOffsets() const override; }; // A function decl, as represented at a "near ABI" level. To be specific, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index fb3e7511a..bd79aa067 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -162,33 +162,10 @@ FunctionLifter::~FunctionLifter(void) {} FunctionLifter::FunctionLifter(const LifterOptions &options_) : options(options_), - memory_provider(options.memory_provider), - type_provider(options.type_provider), - type_specifier(options.TypeDictionary(), options.arch), semantics_module(remill::LoadArchSemantics(options.arch)), llvm_context(semantics_module->getContext()), intrinsics(semantics_module.get()), - op_lifter(options.arch->DefaultLifter(intrinsics)), - pc_reg(options.arch - ->RegisterByName(options.arch->ProgramCounterRegisterName()) - ->EnclosingRegister()), - sp_reg( - options.arch->RegisterByName(options.arch->StackPointerRegisterName()) - ->EnclosingRegister()), - is_sparc(options.arch->IsSPARC32() || options.arch->IsSPARC64()), - is_x86_or_amd64(options.arch->IsX86() || options.arch->IsAMD64()), - i8_type(llvm::Type::getInt8Ty(llvm_context)), - i8_zero(llvm::Constant::getNullValue(i8_type)), - i32_type(llvm::Type::getInt32Ty(llvm_context)), - mem_ptr_type( - llvm::dyn_cast(remill::RecontextualizeType( - options.arch->MemoryPointerType(), llvm_context))), - state_ptr_type( - llvm::dyn_cast(remill::RecontextualizeType( - options.arch->StatePointerType(), llvm_context))), - address_type( - llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), - pc_reg_type(pc_reg->type) { + op_lifter(options.arch->DefaultLifter(intrinsics)) { if (options.pc_metadata_name) { pc_annotation_id = llvm_context.getMDKindID(options.pc_metadata_name); From 67fae08bdaa2ec6521785f4a18ebc368494cc259 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 07:47:56 -0500 Subject: [PATCH 040/163] back to building --- lib/Lifters/CodeLifter.cpp | 35 ++++++++++++++++++++++++++++++++++ lib/Lifters/CodeLifter.h | 11 +++++++++-- lib/Lifters/EntityLifter.cpp | 8 ++++---- lib/Lifters/FunctionLifter.cpp | 18 ++++++++--------- lib/Lifters/FunctionLifter.h | 13 ++++--------- 5 files changed, 61 insertions(+), 24 deletions(-) diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 146aa9ed9..29664844d 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -33,6 +33,41 @@ static void ClearVariableNames(llvm::Function *func) { } // namespace +CodeLifter::CodeLifter(const LifterOptions &options, + llvm::Module *semantics_module) + : options(options), + semantics_module(semantics_module), + intrinsics(semantics_module), + llvm_context(semantics_module->getContext()), + op_lifter(options.arch->DefaultLifter(intrinsics)), + is_sparc(options.arch->IsSPARC32() || options.arch->IsSPARC64()), + is_x86_or_amd64(options.arch->IsX86() || options.arch->IsAMD64()), + pc_reg(options.arch + ->RegisterByName(options.arch->ProgramCounterRegisterName()) + ->EnclosingRegister()), + sp_reg( + options.arch->RegisterByName(options.arch->StackPointerRegisterName()) + ->EnclosingRegister()), + memory_provider(options.memory_provider), + type_provider(options.type_provider), + type_specifier(options.TypeDictionary(), options.arch), + address_type( + llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), + i8_type(llvm::Type::getInt8Ty(llvm_context)), + i8_zero(llvm::Constant::getNullValue(i8_type)), + i32_type(llvm::Type::getInt32Ty(llvm_context)), + mem_ptr_type( + llvm::dyn_cast(remill::RecontextualizeType( + options.arch->MemoryPointerType(), llvm_context))), + state_ptr_type( + llvm::dyn_cast(remill::RecontextualizeType( + options.arch->StatePointerType(), llvm_context))), + pc_reg_type(pc_reg->type) { + if (options.pc_metadata_name) { + pc_annotation_id = llvm_context.getMDKindID(options.pc_metadata_name); + } +} + // Perform architecture-specific initialization of the state structure // in `block`. void CodeLifter::ArchSpecificStateStructureInitialization( diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index a3be6b502..6aa2c7db6 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include @@ -15,8 +17,14 @@ class CodeLifter { const LifterOptions &options; // Remill intrinsics inside of `module`. + + + llvm::Module *semantics_module; + remill::IntrinsicTable intrinsics; + llvm::LLVMContext &llvm_context; + remill::OperandLifter::OpLifterPtr op_lifter; @@ -36,7 +44,6 @@ class CodeLifter { const TypeTranslator type_specifier; llvm::IntegerType *const address_type; - llvm::Module *semantics_module; // Convenient to keep around. llvm::Type *const i8_type; @@ -65,7 +72,7 @@ class CodeLifter { unsigned pc_annotation_id; public: - CodeLifter(const LifterOptions &options); + CodeLifter(const LifterOptions &options, llvm::Module *semantics_module); }; } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/EntityLifter.cpp b/lib/Lifters/EntityLifter.cpp index b88fea994..29f8d700d 100644 --- a/lib/Lifters/EntityLifter.cpp +++ b/lib/Lifters/EntityLifter.cpp @@ -8,7 +8,6 @@ #include "EntityLifter.h" -#include #include #include #include @@ -21,6 +20,8 @@ #include +#include "Lifters/FunctionLifter.h" + namespace anvill { EntityLifterImpl::~EntityLifterImpl(void) {} @@ -30,7 +31,7 @@ EntityLifterImpl::EntityLifterImpl(const LifterOptions &options_) memory_provider(&(options.memory_provider)), type_provider(&(options.type_provider)), value_lifter(options), - function_lifter(options), + function_lifter(FunctionLifter::CreateFunctionLifter(options_)), data_lifter(options) { CHECK_EQ(options.arch->context, &(options.module->getContext())); options.arch->PrepareModule(options.module); @@ -79,8 +80,7 @@ void EntityLifterImpl::ForEachEntityAtAddress( EntityLifter::~EntityLifter(void) {} -EntityLifter::EntityLifter( - const LifterOptions &options_) +EntityLifter::EntityLifter(const LifterOptions &options_) : impl(std::make_shared(options_)) {} // Assuming that `entity` is an entity that was lifted by this `EntityLifter`, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index bd79aa067..ef6b6a233 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -160,19 +160,19 @@ static void AnnotateInstructions(llvm::BasicBlock *block, unsigned id, FunctionLifter::~FunctionLifter(void) {} -FunctionLifter::FunctionLifter(const LifterOptions &options_) - : options(options_), - semantics_module(remill::LoadArchSemantics(options.arch)), - llvm_context(semantics_module->getContext()), - intrinsics(semantics_module.get()), - op_lifter(options.arch->DefaultLifter(intrinsics)) { - if (options.pc_metadata_name) { - pc_annotation_id = llvm_context.getMDKindID(options.pc_metadata_name); - } +FunctionLifter +FunctionLifter::CreateFunctionLifter(const LifterOptions &options_) { + return FunctionLifter(options_, remill::LoadArchSemantics(options_.arch)); } +FunctionLifter::FunctionLifter(const LifterOptions &options_, + std::unique_ptr semantics_module) + : CodeLifter(options_, semantics_module.get()), + semantics_module(std::move(semantics_module)) {} + + llvm::BranchInst * FunctionLifter::BranchToInst(uint64_t from_addr, uint64_t to_addr, const remill::DecodingContext &mapper, diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 37e761058..c68bd06ee 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -75,7 +75,8 @@ class FunctionLifter : public CodeLifter { public: ~FunctionLifter(void); - FunctionLifter(const LifterOptions &options_); + + static FunctionLifter CreateFunctionLifter(const LifterOptions &options_); // Declare a lifted a function. Will return `nullptr` if the memory is // not accessible or executable. @@ -105,18 +106,12 @@ class FunctionLifter : public CodeLifter { llvm::Function *GetBasicBlockFunction(uint64_t address) const; private: - const LifterOptions &options; + FunctionLifter(const LifterOptions &options_, + std::unique_ptr semantics_module); // Semantics module containing all instruction semantics. std::unique_ptr semantics_module; - // Context associated with `module`. - llvm::LLVMContext &llvm_context; - - // Remill intrinsics inside of `module`. - remill::IntrinsicTable intrinsics; - - remill::OperandLifter::OpLifterPtr op_lifter; // Metadata node to attach to lifted instructions to related them to // original instructions. From 9568946e2e00b2a8acc26b959e2b89e5f38e2779 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 09:31:43 -0500 Subject: [PATCH 041/163] add caller to bblifter --- lib/Lifters/BasicBlockLifter.cpp | 39 ++++++++++++ lib/Lifters/BasicBlockLifter.h | 42 +++++++++++++ lib/Lifters/CodeLifter.h | 2 + lib/Lifters/FunctionLifter.cpp | 102 ++----------------------------- lib/Lifters/FunctionLifter.h | 26 +------- 5 files changed, 92 insertions(+), 119 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 42ced2d3d..fa434b484 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -383,4 +383,43 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { return bbf; } + +void BasicBlockLifter::CallBasicBlockFunction( + llvm::IRBuilder<> &builder, llvm::Value *parent_state, + const CallableBasicBlockFunction &cbfunc) const { + + + std::vector args(remill::kNumBlockArgs + 1); + args[remill::kStatePointerArgNum] = parent_state; + + args[remill::kPCArgNum] = options.program_counter_init_procedure( + builder, pc_reg, cbfunc.GetBlock().addr); + args[remill::kMemoryPointerArgNum] = + remill::LoadMemoryPointer(builder, this->intrinsics); + + args[remill::kNumBlockArgs] = + remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); + + + auto packed_locals = + this->PackLocals(builder, parent_state, cbfunc.GetInScopeVaraibles()); + + + args.push_back(packed_locals); + + auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); + + auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); + + builder.CreateStore(new_mem_ptr, mem_ptr_ref); + + this->PackLocals(builder, state_ptr, cbfunc.GetInScopeVaraibles()); +} + + +void CallableBasicBlockFunction::CallBasicBlockFunction( + llvm::IRBuilder<> &add_to_llvm, llvm::Value *parent_state) const { + this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this); +} + } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 9c0b57b89..0475cf3ac 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -1,11 +1,17 @@ +#pragma once #include #include #include #include #include +#include +#include +#include #include +#include + #include "CodeLifter.h" #include "anvill/Declarations.h" @@ -20,6 +26,29 @@ struct BasicBlockFunction { llvm::Argument *next_pc_out_param; }; +class BasicBlockLifter; +class CallableBasicBlockFunction { + + private: + llvm::Function *func; + std::vector in_scope_locals; + CodeBlock block; + const BasicBlockLifter &bb_lifter; + + + public: + const std::vector &GetInScopeVaraibles() const; + llvm::Function *GetFunction() const; + + llvm::StructType *GetRetType() const; + + const CodeBlock &GetBlock() const; + + // Calls a basic block function and unpacks the result into the state + void CallBasicBlockFunction(llvm::IRBuilder<> &, + llvm::Value *state_ptr) const; +}; + /** * @brief A BasicBlockLifter lifts a basic block as a native function that takes * in scope variables and returns in scope variables (essentially an SSAed form of the entire block) @@ -72,6 +101,19 @@ class BasicBlockLifter : public CodeLifter { public: llvm::Function *LiftBasicBlockFunction(); + + // Packs in scope variables into a struct + llvm::Value *PackLocals(llvm::IRBuilder<> &, llvm::Value *from_state_ptr, + const std::vector &) const; + + void UnpackLocals(llvm::IRBuilder<> &, llvm::Value *returned_value, + llvm::Value *into_state_ptr, + const std::vector &) const; + + + // Calls a basic block function and unpacks the result into the state + void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, + const CallableBasicBlockFunction &) const; }; } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 6aa2c7db6..aaf2999dd 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index ef6b6a233..23d759129 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -61,6 +61,7 @@ #include #include +#include "BasicBlockLifter.h" #include "EntityLifter.h" #include "anvill/Declarations.h" #include "anvill/Specification.h" @@ -428,49 +429,20 @@ FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( return call; } -llvm::CallInst *FunctionLifter::CallBasicBlockFunction( - uint64_t block_addr, llvm::BasicBlock *add_to_llvm, llvm::Function *bb_func, - llvm::Value *parent_state, llvm::ArrayRef extra_args, - llvm::Instruction *IP) const { - llvm::IRBuilder<> builder(add_to_llvm); - if (IP) { - builder.SetInsertPoint(IP); - } - std::vector args(remill::kNumBlockArgs + 1); - args[remill::kStatePointerArgNum] = parent_state; - - args[remill::kPCArgNum] = - options.program_counter_init_procedure(builder, pc_reg, block_addr); - args[remill::kMemoryPointerArgNum] = - remill::LoadMemoryPointer(add_to_llvm, this->intrinsics); - - args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(add_to_llvm); - - for (auto earg : extra_args) { - args.push_back(earg); - } - - return builder.CreateCall(bb_func, args); -} - void FunctionLifter::VisitBlock(CodeBlock blk, llvm::Value *lifted_function_state) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); llvm::IRBuilder<> builder(llvm_blk); - auto bb_lifted_func = this->CreateBasicBlockFunction(blk); - bb_lifted_func.func->removeFnAttr(llvm::Attribute::AlwaysInline); - bb_lifted_func.func->addFnAttr(llvm::Attribute::NoInline); - this->LiftBasicBlockIntoFunction(bb_lifted_func, blk); - CHECK(!llvm::verifyFunction(*bb_lifted_func.func, &llvm::errs())); - auto new_mem_ptr = this->CallBasicBlockFunction( - blk.addr, llvm_blk, bb_lifted_func.func, lifted_function_state); + auto bbfunc = this->LiftBasicBlockFunction(blk); - auto mem_ptr_ref = remill::LoadMemoryPointerRef(llvm_blk); - builder.CreateStore(new_mem_ptr, mem_ptr_ref); + CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); + + bbfunc.CallBasicBlockFunction(builder, lifted_function_state); + auto pc = remill::LoadNextProgramCounter(llvm_blk, this->intrinsics); @@ -494,14 +466,6 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); } -llvm::Function *FunctionLifter::GetBasicBlockFunction(uint64_t address) const { - auto it = addr_to_bb_func.find(address); - if (it == addr_to_bb_func.end()) { - return nullptr; - } - return it->second.func; -} - LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { auto new_func = @@ -624,10 +588,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // Go lift all instructions! VisitBlocks(lifted_func_st.state_ptr); - - CallAndInitializeParameters param_pass(options.TypeDictionary(), intrinsics); - this->ApplyBasicBlockTransform(param_pass, lifted_func_st.state_ptr); - // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. CallLiftedFunctionFromNativeFunction(decl); @@ -639,56 +599,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { return native_func; } - -void FunctionLifter::ApplyBasicBlockTransform( - BasicBlockTransform &transform, llvm::Value *lifted_function_state) { - llvm::SmallVector, 10> calls; - for (auto &insn : llvm::instructions(this->lifted_func)) { - if (llvm::CallInst *call = llvm::dyn_cast(&insn)) { - auto addr = GetBasicBlockAddr(call->getCalledFunction()); - LOG(INFO) << "getting basic block addr " - << remill::LLVMThingToString(call); - if (addr) { - calls.emplace_back(call, *addr); - } - } - } - - for (auto [call, addr] : calls) { - // avoid iterator invalidation - auto cont = this->curr_decl->GetBlockContext(addr); - AnvillBasicBlock block = {call->getCalledFunction(), cont}; - LOG(INFO) << "transforming"; - auto res = transform.Transform(block); - std::vector lifted_values; - auto old_block = call->getParent(); - auto new_block = llvm::SplitBlock(call->getParent(), call); - - old_block->getTerminator()->eraseFromParent(); - for (auto arg : res.appended_args) { - lifted_values.push_back(LoadLiftedValue( - arg, this->options.TypeDictionary(), this->intrinsics, old_block, - call->getArgOperand(remill::kStatePointerArgNum), - call->getArgOperand(remill::kMemoryPointerArgNum))); - } - auto new_call = this->CallBasicBlockFunction(addr, old_block, res.new_func, - lifted_function_state, - lifted_values, call); - - llvm::BranchInst::Create(new_block, old_block); - call->replaceAllUsesWith(new_call); - std::string fname = std::string(call->getCalledFunction()->getName()); - call->eraseFromParent(); - call->getCalledFunction()->eraseFromParent(); - llvm::MergeBlockIntoPredecessor(new_block); - res.new_func->setName(fname); - - - // TODO(Ian): need to setup metadata in transform - } -} - - // Returns the address of a named function. std::optional FunctionLifter::AddressOfNamedFunction(const std::string &func_name) const { diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index c68bd06ee..56b7f8e97 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -31,6 +31,7 @@ #include "BasicBlockTransform.h" #include "CodeLifter.h" +#include "Lifters/BasicBlockLifter.h" namespace llvm { class Constant; @@ -54,14 +55,6 @@ class TypeProvider; struct ControlFlowTargetList; -struct BasicBlockFunction { - llvm::Function *func; - llvm::Value *state_ptr; - llvm::Argument *pc_arg; - llvm::Argument *mem_ptr; - llvm::Argument *next_pc_out_param; -}; - struct LiftedFunction { llvm::Function *func; llvm::Argument *state_ptr; @@ -97,11 +90,8 @@ class FunctionLifter : public CodeLifter { const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; - llvm::CallInst * - CallBasicBlockFunction(uint64_t block_addr, llvm::BasicBlock *add_to_llvm, - llvm::Function *bb_func, llvm::Value *state_ptr, - llvm::ArrayRef extra_args = {}, - llvm::Instruction *IP = {}) const; + + CallableBasicBlockFunction LiftBasicBlockFunction(const CodeBlock &) const; llvm::Function *GetBasicBlockFunction(uint64_t address) const; @@ -180,9 +170,6 @@ class FunctionLifter : public CodeLifter { llvm::BasicBlock *invalid_successor_block{nullptr}; - // Maps basic block addresses to lifted functions - std::unordered_map addr_to_bb_func; - // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. llvm::MDNode *GetPCAnnotation(uint64_t pc) const; @@ -296,13 +283,6 @@ class FunctionLifter : public CodeLifter { LiftedFunction CreateLiftedFunction(const std::string &name); - BasicBlockFunction CreateBasicBlockFunction(const CodeBlock &block); - - - llvm::BasicBlock * - LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function, - const CodeBlock &blk); - remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); From ebeff624e977c2a031ff44e97394cdec04e060e7 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 13:52:28 -0500 Subject: [PATCH 042/163] added pack and unpack --- lib/Lifters/BasicBlockLifter.cpp | 76 ++++++++++++++++++++++++++++++-- lib/Lifters/BasicBlockLifter.h | 8 ++++ lib/Lifters/FunctionLifter.h | 3 -- 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index fa434b484..9fbf2695a 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -2,10 +2,16 @@ #include #include +#include +#include #include #include #include +#include +#include +#include + namespace anvill { llvm::Function *BasicBlockLifter::LiftBasicBlockFunction() { @@ -384,6 +390,59 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } +llvm::StructType *BasicBlockLifter::StructTypeFromVars( + const std::vector &in_scope_locals) const { + std::vector field_types; + std::transform(in_scope_locals.begin(), in_scope_locals.end(), + std::back_inserter(field_types), + [](const ParameterDecl ¶m) { return param.type; }); + + return llvm::StructType::create(llvm_context, field_types); +} + +// Packs in scope variables into a struct +llvm::Value * +BasicBlockLifter::PackLocals(llvm::IRBuilder<> &bldr, + llvm::Value *from_state_ptr, + const std::vector &decls) const { + auto ty = this->StructTypeFromVars(decls); + auto sptr = bldr.CreateAlloca(ty); + auto i32 = llvm::IntegerType::get(llvm_context, 32); + uint64_t field_offset = 0; + for (auto decl : decls) { + auto ptr = bldr.CreateGEP(ty, sptr, + {llvm::ConstantInt::get(i32, 0), + llvm::ConstantInt::get(i32, field_offset)}); + field_offset += 1; + + auto state_loaded_value = + LoadLiftedValue(decl, this->type_provider.Dictionary(), + this->intrinsics, bldr.GetInsertBlock(), from_state_ptr, + remill::LoadMemoryPointer(bldr, this->intrinsics)); + bldr.CreateStore(state_loaded_value, ptr); + } + + return bldr.CreateLoad(ty, sptr); +} + +void BasicBlockLifter::UnpackLocals( + llvm::IRBuilder<> &bldr, llvm::Value *returned_value, + llvm::Value *into_state_ptr, + const std::vector &decls) const { + uint64_t field_offset = 0; + for (auto decl : decls) { + auto extracted_field = + bldr.CreateExtractElement(returned_value, field_offset); + auto new_mem_ptr = StoreNativeValue( + extracted_field, decl, this->type_provider.Dictionary(), + this->intrinsics, bldr.GetInsertBlock(), into_state_ptr, + remill::LoadMemoryPointer(bldr, this->intrinsics)); + bldr.CreateStore(new_mem_ptr, + remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); + } +} + + void BasicBlockLifter::CallBasicBlockFunction( llvm::IRBuilder<> &builder, llvm::Value *parent_state, const CallableBasicBlockFunction &cbfunc) const { @@ -407,13 +466,14 @@ void BasicBlockLifter::CallBasicBlockFunction( args.push_back(packed_locals); - auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); + auto new_locals = builder.CreateCall(cbfunc.GetFunction(), args); - auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); + //auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); - builder.CreateStore(new_mem_ptr, mem_ptr_ref); + // TODO(Ian) move this to an out param: builder.CreateStore(new_mem_ptr, mem_ptr_ref); - this->PackLocals(builder, state_ptr, cbfunc.GetInScopeVaraibles()); + this->UnpackLocals(builder, new_locals, state_ptr, + cbfunc.GetInScopeVaraibles()); } @@ -422,4 +482,12 @@ void CallableBasicBlockFunction::CallBasicBlockFunction( this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this); } +CallableBasicBlockFunction::CallableBasicBlockFunction( + llvm::Function *func, std::vector in_scope_locals, + CodeBlock block, const BasicBlockLifter &bb_lifter) + : func(func), + in_scope_locals(in_scope_locals), + block(block), + bb_lifter(bb_lifter) {} + } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 0475cf3ac..484f2dee5 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -37,6 +37,11 @@ class CallableBasicBlockFunction { public: + CallableBasicBlockFunction(llvm::Function *func, + std::vector in_scope_locals, + CodeBlock block, + const BasicBlockLifter &bb_lifter); + const std::vector &GetInScopeVaraibles() const; llvm::Function *GetFunction() const; @@ -114,6 +119,9 @@ class BasicBlockLifter : public CodeLifter { // Calls a basic block function and unpacks the result into the state void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, const CallableBasicBlockFunction &) const; + + llvm::StructType * + StructTypeFromVars(const std::vector &in_scope_locals) const; }; } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 56b7f8e97..3aed298cd 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -307,9 +307,6 @@ class FunctionLifter : public CodeLifter { // that all semantics and helpers are completely inlined. void RecursivelyInlineLiftedFunctionIntoNativeFunction(void); - // inline on arbitrary function. - void RecursivelyInlineFunctionCallees(llvm::Function *); - // Manipulates the control flow to restore intra-procedural state when reaching an // inter-procedural effect. // Returns a boolean represnting wether decoding should continue (true = non-terminal, false=terminal) From 73b03eb57734b70a27cc050a75561d4c4707149a Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 14:39:13 -0500 Subject: [PATCH 043/163] fix memory issue --- lib/Lifters/BasicBlockLifter.cpp | 20 +++++++-- lib/Lifters/BasicBlockLifter.h | 69 ++++++++++++++++++-------------- lib/Lifters/CodeLifter.cpp | 7 +++- lib/Lifters/CodeLifter.h | 7 +++- lib/Lifters/FunctionLifter.cpp | 24 ++++------- lib/Lifters/FunctionLifter.h | 16 +------- 6 files changed, 73 insertions(+), 70 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 9fbf2695a..afb6fe0bd 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -10,14 +10,17 @@ #include #include +#include #include namespace anvill { -llvm::Function *BasicBlockLifter::LiftBasicBlockFunction() { +CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { auto bbfunc = this->CreateBasicBlockFunction(); this->LiftBasicBlockIntoFunction(bbfunc); - return bbfunc.func; + return CallableBasicBlockFunction(bbfunc.func, + this->block_context.GetAvailableVariables(), + block_def, std::move(*this)); } @@ -482,12 +485,21 @@ void CallableBasicBlockFunction::CallBasicBlockFunction( this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this); } +CallableBasicBlockFunction +BasicBlockLifter::LiftBasicBlock(const BasicBlockContext &block_context, + const CodeBlock &block_def, + const LifterOptions &options_) { + + return BasicBlockLifter(block_context, block_def, options_) + .LiftBasicBlockFunction(); +} + CallableBasicBlockFunction::CallableBasicBlockFunction( llvm::Function *func, std::vector in_scope_locals, - CodeBlock block, const BasicBlockLifter &bb_lifter) + CodeBlock block, BasicBlockLifter bb_lifter) : func(func), in_scope_locals(in_scope_locals), block(block), - bb_lifter(bb_lifter) {} + bb_lifter(std::move(bb_lifter)) {} } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 484f2dee5..b62ff1a1a 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -26,33 +26,7 @@ struct BasicBlockFunction { llvm::Argument *next_pc_out_param; }; -class BasicBlockLifter; -class CallableBasicBlockFunction { - - private: - llvm::Function *func; - std::vector in_scope_locals; - CodeBlock block; - const BasicBlockLifter &bb_lifter; - - - public: - CallableBasicBlockFunction(llvm::Function *func, - std::vector in_scope_locals, - CodeBlock block, - const BasicBlockLifter &bb_lifter); - - const std::vector &GetInScopeVaraibles() const; - llvm::Function *GetFunction() const; - - llvm::StructType *GetRetType() const; - - const CodeBlock &GetBlock() const; - - // Calls a basic block function and unpacks the result into the state - void CallBasicBlockFunction(llvm::IRBuilder<> &, - llvm::Value *state_ptr) const; -}; +class CallableBasicBlockFunction; /** * @brief A BasicBlockLifter lifts a basic block as a native function that takes @@ -73,9 +47,6 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); - BasicBlockLifter(const BasicBlockContext &block_context, - const CodeBlock &block_def, const LifterOptions &options_); - void LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function); BasicBlockFunction CreateBasicBlockFunction(); @@ -105,7 +76,14 @@ class BasicBlockLifter : public CodeLifter { public: - llvm::Function *LiftBasicBlockFunction(); + BasicBlockLifter(const BasicBlockContext &block_context, + const CodeBlock &block_def, const LifterOptions &options_); + static CallableBasicBlockFunction + LiftBasicBlock(const BasicBlockContext &block_context, + const CodeBlock &block_def, const LifterOptions &options_); + + + CallableBasicBlockFunction LiftBasicBlockFunction() &&; // Packs in scope variables into a struct llvm::Value *PackLocals(llvm::IRBuilder<> &, llvm::Value *from_state_ptr, @@ -122,6 +100,35 @@ class BasicBlockLifter : public CodeLifter { llvm::StructType * StructTypeFromVars(const std::vector &in_scope_locals) const; + + BasicBlockLifter(BasicBlockLifter &&) = default; +}; + +class CallableBasicBlockFunction { + + private: + llvm::Function *func; + std::vector in_scope_locals; + CodeBlock block; + BasicBlockLifter bb_lifter; + + + public: + CallableBasicBlockFunction(llvm::Function *func, + std::vector in_scope_locals, + CodeBlock block, BasicBlockLifter bb_lifter); + + const std::vector &GetInScopeVaraibles() const; + llvm::Function *GetFunction() const; + + llvm::StructType *GetRetType() const; + + const CodeBlock &GetBlock() const; + + // Calls a basic block function and unpacks the result into the state + void CallBasicBlockFunction(llvm::IRBuilder<> &, + llvm::Value *state_ptr) const; }; + } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 29664844d..2e90ff4ba 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -17,6 +17,8 @@ #include +#include "anvill/Type.h" + namespace anvill { namespace { // Clear out LLVM variable names. They're usually not helpful. @@ -34,7 +36,8 @@ static void ClearVariableNames(llvm::Function *func) { CodeLifter::CodeLifter(const LifterOptions &options, - llvm::Module *semantics_module) + llvm::Module *semantics_module, + const TypeTranslator &type_specifier) : options(options), semantics_module(semantics_module), intrinsics(semantics_module), @@ -50,7 +53,7 @@ CodeLifter::CodeLifter(const LifterOptions &options, ->EnclosingRegister()), memory_provider(options.memory_provider), type_provider(options.type_provider), - type_specifier(options.TypeDictionary(), options.arch), + type_specifier(type_specifier), address_type( llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), i8_type(llvm::Type::getInt8Ty(llvm_context)), diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index aaf2999dd..0ea73d977 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -43,7 +43,7 @@ class CodeLifter { const MemoryProvider &memory_provider; const TypeProvider &type_provider; - const TypeTranslator type_specifier; + const TypeTranslator &type_specifier; llvm::IntegerType *const address_type; @@ -74,7 +74,10 @@ class CodeLifter { unsigned pc_annotation_id; public: - CodeLifter(const LifterOptions &options, llvm::Module *semantics_module); + CodeLifter(const LifterOptions &options, llvm::Module *semantics_module, + const TypeTranslator &type_specifier); + + CodeLifter(CodeLifter &&) = default; }; } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 23d759129..154753773 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -170,8 +170,9 @@ FunctionLifter::CreateFunctionLifter(const LifterOptions &options_) { FunctionLifter::FunctionLifter(const LifterOptions &options_, std::unique_ptr semantics_module) - : CodeLifter(options_, semantics_module.get()), - semantics_module(std::move(semantics_module)) {} + : CodeLifter(options_, semantics_module.get(), this->type_specifier), + semantics_module(std::move(semantics_module)), + type_specifier(options_.TypeDictionary(), options_.arch) {} llvm::BranchInst * @@ -413,23 +414,12 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } - -llvm::CallInst * -FunctionLifter::AddTerminatingTailCallFromBasicBlockFunctionToLifted( - llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics) { - llvm::IRBuilder<> ir(source_block); - auto npc = remill::LoadNextProgramCounter(source_block, intrinsics); - auto pc_ref = remill::LoadProgramCounterRef(source_block); - ir.CreateStore(npc, pc_ref); - auto call = this->AddCallFromBasicBlockFunctionToLifted( - source_block, dest_func, intrinsics); - call->setTailCall(true); - ir.CreateRet(call); - return call; +CallableBasicBlockFunction +FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { + return BasicBlockLifter::LiftBasicBlock( + this->curr_decl->GetBlockContext(blk.addr), blk, this->options); } - void FunctionLifter::VisitBlock(CodeBlock blk, llvm::Value *lifted_function_state) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 3aed298cd..b1ed1365c 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -102,6 +102,8 @@ class FunctionLifter : public CodeLifter { // Semantics module containing all instruction semantics. std::unique_ptr semantics_module; + TypeTranslator type_specifier; + // Metadata node to attach to lifted instructions to related them to // original instructions. @@ -319,20 +321,6 @@ class FunctionLifter : public CodeLifter { const anvill::ControlFlowOverride &override, llvm::Value *state_ptr); - // Same addcall machinery from remill except allows for the 4 argument basic block functio (state, program_counter, memory, next_pc_ref). - llvm::CallInst *AddCallFromBasicBlockFunctionToLifted( - llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics); - - llvm::CallInst *AddTerminatingTailCallFromBasicBlockFunctionToLifted( - llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics); - - - // Allocate and initialize the state structure. - llvm::Value *AllocateAndInitializeStateStructure(llvm::BasicBlock *block, - const remill::Arch *arch); - // Perform architecture-specific initialization of the state structure // in `block`. void ArchSpecificStateStructureInitialization(llvm::BasicBlock *block, From d90fb044aa83a369784d5089e102a076205061b0 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 14:51:51 -0500 Subject: [PATCH 044/163] fix constructor --- lib/Lifters/BasicBlockLifter.cpp | 22 +++++++++++++++++----- lib/Lifters/BasicBlockLifter.h | 8 ++++++-- lib/Lifters/FunctionLifter.cpp | 3 ++- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index afb6fe0bd..1f0953a83 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -13,6 +13,8 @@ #include #include +#include "Lifters/CodeLifter.h" + namespace anvill { CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { @@ -485,15 +487,25 @@ void CallableBasicBlockFunction::CallBasicBlockFunction( this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this); } -CallableBasicBlockFunction -BasicBlockLifter::LiftBasicBlock(const BasicBlockContext &block_context, - const CodeBlock &block_def, - const LifterOptions &options_) { +CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlock( + const BasicBlockContext &block_context, const CodeBlock &block_def, + const LifterOptions &options_, llvm::Module *semantics_module, + const TypeTranslator &type_specifier) { - return BasicBlockLifter(block_context, block_def, options_) + return BasicBlockLifter(block_context, block_def, options_, semantics_module, + type_specifier) .LiftBasicBlockFunction(); } +BasicBlockLifter::BasicBlockLifter(const BasicBlockContext &block_context, + const CodeBlock &block_def, + const LifterOptions &options_, + llvm::Module *semantics_module, + const TypeTranslator &type_specifier) + : CodeLifter(options_, semantics_module, type_specifier), + block_context(block_context), + block_def(block_def) {} + CallableBasicBlockFunction::CallableBasicBlockFunction( llvm::Function *func, std::vector in_scope_locals, CodeBlock block, BasicBlockLifter bb_lifter) diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index b62ff1a1a..7245dfd6e 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -77,10 +77,14 @@ class BasicBlockLifter : public CodeLifter { public: BasicBlockLifter(const BasicBlockContext &block_context, - const CodeBlock &block_def, const LifterOptions &options_); + const CodeBlock &block_def, const LifterOptions &options_, + llvm::Module *semantics_module, + const TypeTranslator &type_specifier); static CallableBasicBlockFunction LiftBasicBlock(const BasicBlockContext &block_context, - const CodeBlock &block_def, const LifterOptions &options_); + const CodeBlock &block_def, const LifterOptions &options_, + llvm::Module *semantics_module, + const TypeTranslator &type_specifier); CallableBasicBlockFunction LiftBasicBlockFunction() &&; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 154753773..a23b9d2ed 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -417,7 +417,8 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { CallableBasicBlockFunction FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { return BasicBlockLifter::LiftBasicBlock( - this->curr_decl->GetBlockContext(blk.addr), blk, this->options); + this->curr_decl->GetBlockContext(blk.addr), blk, this->options, + this->semantics_module.get(), this->type_specifier); } void FunctionLifter::VisitBlock(CodeBlock blk, From 6891e445a146d7a23631de2429da4c4f8f56e526 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 15:01:18 -0500 Subject: [PATCH 045/163] add stack offsets to context --- include/anvill/Declarations.h | 5 ++++- lib/Declarations.cpp | 12 +++++++++++- lib/Lifters/BasicBlockLifter.cpp | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 493e86591..d0e8d866b 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -208,9 +208,12 @@ struct FunctionDecl; class SpecBlockContext : public BasicBlockContext { private: const FunctionDecl &decl; + SpecStackOffsets offsets; public: - SpecBlockContext(const FunctionDecl &decl) : decl(decl) {} + SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets) + : decl(decl), + offsets(std::move(offsets)) {} virtual std::vector GetAvailableVariables() const override; virtual const SpecStackOffsets &GetStackOffsets() const override; }; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 9986dd7c9..c6f876106 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -114,6 +114,10 @@ std::vector SpecBlockContext::GetAvailableVariables() const { return decls; } +const SpecStackOffsets &SpecBlockContext::GetStackOffsets() const { + return this->offsets; +} + // Interpret `target` as being the function to call, and call it from within // a basic block in a lifted bitcode function. Returns the new value of the // memory pointer. @@ -305,7 +309,13 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { } SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { - return SpecBlockContext(*this); + auto offs = this->stack_offsets.find(addr); + if (offs != this->stack_offsets.end()) { + + return SpecBlockContext(*this, offs->second); + } else { + return SpecBlockContext(*this, SpecStackOffsets()); + } } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 1f0953a83..a170f87ed 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include "Lifters/CodeLifter.h" +#include "anvill/Declarations.h" namespace anvill { @@ -514,4 +516,18 @@ CallableBasicBlockFunction::CallableBasicBlockFunction( block(block), bb_lifter(std::move(bb_lifter)) {} + +const CodeBlock &CallableBasicBlockFunction::GetBlock() const { + return this->block; +} + +llvm::Function *CallableBasicBlockFunction::GetFunction() const { + return this->func; +} + +const std::vector & +CallableBasicBlockFunction::GetInScopeVaraibles() const { + return this->in_scope_locals; +} + } // namespace anvill \ No newline at end of file From 3c851f83264ba0174af14a72b5d1a4680349b824 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 15:37:24 -0500 Subject: [PATCH 046/163] use same struct type --- lib/Lifters/BasicBlockLifter.cpp | 34 +++++++++++++++++++++++++------- lib/Lifters/BasicBlockLifter.h | 7 +++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index a170f87ed..942824d4c 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -315,8 +315,18 @@ void BasicBlockLifter::LiftBasicBlockIntoFunction( builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), basic_block_function.next_pc_out_param); - auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); - llvm::ReturnInst::Create(bb->getContext(), memory, bb); + + + // TODO(Ian): output this memory somehow auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); + CHECK_EQ(this->PackLocals(builder, basic_block_function.state_ptr, + this->block_context.GetAvailableVariables()) + ->getType(), + basic_block_function.func->getReturnType()); + llvm::ReturnInst::Create( + bb->getContext(), + this->PackLocals(builder, basic_block_function.state_ptr, + this->block_context.GetAvailableVariables()), + bb); } this->RecursivelyInlineFunctionCallees(basic_block_function.func); } @@ -337,10 +347,11 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { std::vector params = std::vector( lifted_func_type->param_begin(), lifted_func_type->param_end()); + //next_pc_out params.push_back(llvm::PointerType::get(context, 0)); - + params.push_back(var_struct_ty); llvm::FunctionType *func_type = - llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); + llvm::FunctionType::get(this->var_struct_ty, params, false); llvm::StringRef name(name_.data(), name_.size()); @@ -355,10 +366,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); + auto in_vars = remill::NthArgument(func, remill::kNumBlockArgs + 1); memory->setName("memory"); out_state->setName("state_out"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); + in_vars->setName("in_vars"); options.arch->InitializeEmptyLiftedFunction(func); @@ -383,6 +396,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } } + this->UnpackLocals(ir, in_vars, state, + this->block_context.GetAvailableVariables()); + auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); @@ -412,7 +428,7 @@ llvm::Value * BasicBlockLifter::PackLocals(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, const std::vector &decls) const { - auto ty = this->StructTypeFromVars(decls); + auto ty = this->var_struct_ty; auto sptr = bldr.CreateAlloca(ty); auto i32 = llvm::IntegerType::get(llvm_context, 32); uint64_t field_offset = 0; @@ -439,7 +455,8 @@ void BasicBlockLifter::UnpackLocals( uint64_t field_offset = 0; for (auto decl : decls) { auto extracted_field = - bldr.CreateExtractElement(returned_value, field_offset); + bldr.CreateExtractValue(returned_value, field_offset); + field_offset += 1; auto new_mem_ptr = StoreNativeValue( extracted_field, decl, this->type_provider.Dictionary(), this->intrinsics, bldr.GetInsertBlock(), into_state_ptr, @@ -506,7 +523,10 @@ BasicBlockLifter::BasicBlockLifter(const BasicBlockContext &block_context, const TypeTranslator &type_specifier) : CodeLifter(options_, semantics_module, type_specifier), block_context(block_context), - block_def(block_def) {} + block_def(block_def) { + this->var_struct_ty = + this->StructTypeFromVars(this->block_context.GetAvailableVariables()); +} CallableBasicBlockFunction::CallableBasicBlockFunction( llvm::Function *func, std::vector in_scope_locals, diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 7245dfd6e..12eee5f78 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -38,9 +38,14 @@ class BasicBlockLifter : public CodeLifter { const BasicBlockContext &block_context; const CodeBlock &block_def; + llvm::StructType *var_struct_ty{nullptr}; + // The allocated state ptr for the function. llvm::Value *state_ptr; + llvm::StructType * + StructTypeFromVars(const std::vector &in_scope_locals) const; + remill::DecodingContext ApplyContextAssignments( const std::unordered_map &assignments, remill::DecodingContext prev_context); @@ -102,8 +107,6 @@ class BasicBlockLifter : public CodeLifter { void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, const CallableBasicBlockFunction &) const; - llvm::StructType * - StructTypeFromVars(const std::vector &in_scope_locals) const; BasicBlockLifter(BasicBlockLifter &&) = default; }; From 6ca87116743ea11514d204e4d6265f5696f29619 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 15:59:39 -0500 Subject: [PATCH 047/163] fix state ref --- lib/Lifters/BasicBlockLifter.cpp | 11 ++++++----- lib/Lifters/BasicBlockLifter.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 942824d4c..395a49e08 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -379,7 +379,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto &blk = func->getEntryBlock(); llvm::IRBuilder<> ir(&blk); - auto state = this->AllocateAndInitializeStateStructure(&blk, options.arch); + this->state_ptr = + this->AllocateAndInitializeStateStructure(&blk, options.arch); // Put registers that are referencing the stack in terms of their displacement so that we // Can resolve these stack references later . @@ -392,11 +393,11 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { ir, this->sp_reg, this->block_def.addr, reg_off.offset); StoreNativeValueToRegister(new_value, reg_off.target_register, type_provider.Dictionary(), intrinsics, &blk, - state); + this->state_ptr); } } - this->UnpackLocals(ir, in_vars, state, + this->UnpackLocals(ir, in_vars, this->state_ptr, this->block_context.GetAvailableVariables()); @@ -407,7 +408,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->addFnAttr(llvm::Attribute::NoInline); func->setLinkage(llvm::GlobalValue::InternalLinkage); - BasicBlockFunction bbf{func, state, pc_arg, mem_arg, next_pc_out}; + BasicBlockFunction bbf{func, this->state_ptr, pc_arg, mem_arg, next_pc_out}; return bbf; } @@ -496,7 +497,7 @@ void BasicBlockLifter::CallBasicBlockFunction( // TODO(Ian) move this to an out param: builder.CreateStore(new_mem_ptr, mem_ptr_ref); - this->UnpackLocals(builder, new_locals, state_ptr, + this->UnpackLocals(builder, new_locals, parent_state, cbfunc.GetInScopeVaraibles()); } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 12eee5f78..dee01366b 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -41,7 +41,7 @@ class BasicBlockLifter : public CodeLifter { llvm::StructType *var_struct_ty{nullptr}; // The allocated state ptr for the function. - llvm::Value *state_ptr; + llvm::Value *state_ptr{nullptr}; llvm::StructType * StructTypeFromVars(const std::vector &in_scope_locals) const; From ca78a0a87fb9b4a42d1e121b8d06f9600ad35750 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 21:18:14 -0500 Subject: [PATCH 048/163] mostly lifting... weird debug info --- lib/Lifters/BasicBlockLifter.cpp | 134 +++++++++++++++++++------------ lib/Lifters/BasicBlockLifter.h | 11 ++- lib/Lifters/FunctionLifter.cpp | 11 ++- 3 files changed, 99 insertions(+), 57 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 395a49e08..dba431f38 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -5,7 +5,10 @@ #include #include #include +#include +#include #include +#include #include #include @@ -21,7 +24,14 @@ namespace anvill { CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { auto bbfunc = this->CreateBasicBlockFunction(); - this->LiftBasicBlockIntoFunction(bbfunc); + this->LiftInstructionsIntoLiftedFunction(); + CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); + CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); + this->RecursivelyInlineFunctionCallees(bbfunc.func); + + + bbfunc.func->dump(); + //CHECK(false); return CallableBasicBlockFunction(bbfunc.func, this->block_context.GetAvailableVariables(), block_def, std::move(*this)); @@ -265,12 +275,11 @@ bool BasicBlockLifter::DecodeInstructionInto(const uint64_t addr, } -void BasicBlockLifter::LiftBasicBlockIntoFunction( - BasicBlockFunction &basic_block_function) { - auto entry_block = &basic_block_function.func->getEntryBlock(); +void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { + auto entry_block = &this->lifted_func->getEntryBlock(); - auto bb = llvm::BasicBlock::Create(basic_block_function.func->getContext(), - "", basic_block_function.func); + auto bb = llvm::BasicBlock::Create(this->lifted_func->getContext(), "", + this->lifted_func); llvm::BranchInst::Create(bb, entry_block); @@ -298,13 +307,10 @@ void BasicBlockLifter::LiftBasicBlockIntoFunction( // a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want // to treat instruction lifting as an operation that can't fail. - CHECK(llvm::isa_and_nonnull( - basic_block_function.state_ptr) && - llvm::cast(basic_block_function.state_ptr) - ->getParent() - ->getParent() == basic_block_function.func); + std::ignore = inst.GetLifter()->LiftIntoBlock( - inst, bb, basic_block_function.state_ptr, false /* is_delayed */); + inst, bb, this->lifted_func->getArg(remill::kStatePointerArgNum), + false /* is_delayed */); ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride(inst, bb); @@ -314,21 +320,12 @@ void BasicBlockLifter::LiftBasicBlockIntoFunction( llvm::IRBuilder<> builder(bb); builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), - basic_block_function.next_pc_out_param); + this->lifted_func->getArg(remill::kNumBlockArgs)); - // TODO(Ian): output this memory somehow auto memory = remill::LoadMemoryPointer(bb, this->intrinsics); - CHECK_EQ(this->PackLocals(builder, basic_block_function.state_ptr, - this->block_context.GetAvailableVariables()) - ->getType(), - basic_block_function.func->getReturnType()); llvm::ReturnInst::Create( - bb->getContext(), - this->PackLocals(builder, basic_block_function.state_ptr, - this->block_context.GetAvailableVariables()), - bb); + bb->getContext(), remill::LoadMemoryPointer(bb, this->intrinsics), bb); } - this->RecursivelyInlineFunctionCallees(basic_block_function.func); } @@ -347,11 +344,13 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { std::vector params = std::vector( lifted_func_type->param_begin(), lifted_func_type->param_end()); + + // pointer to varstruct + params[remill::kStatePointerArgNum] = llvm::PointerType::get(context, 0); //next_pc_out params.push_back(llvm::PointerType::get(context, 0)); - params.push_back(var_struct_ty); llvm::FunctionType *func_type = - llvm::FunctionType::get(this->var_struct_ty, params, false); + llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); llvm::StringRef name(name_.data(), name_.size()); @@ -363,21 +362,42 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { GetBasicBlockAnnotation(this->block_def.addr)); auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); - auto out_state = remill::NthArgument(func, remill::kStatePointerArgNum); + auto in_vars = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); - auto in_vars = remill::NthArgument(func, remill::kNumBlockArgs + 1); + memory->setName("memory"); - out_state->setName("state_out"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); in_vars->setName("in_vars"); - options.arch->InitializeEmptyLiftedFunction(func); + + auto liftedty = this->options.arch->LiftedFunctionType(); + + std::vector new_params; + new_params.reserve(liftedty->getNumParams() + 1); + + for (auto param : liftedty->params()) { + new_params.push_back(param); + } + new_params.push_back(llvm::PointerType::get(context, 0)); + + + llvm::FunctionType *new_func_type = llvm::FunctionType::get( + lifted_func_type->getReturnType(), new_params, false); + + + this->lifted_func = llvm::Function::Create( + new_func_type, llvm::GlobalValue::ExternalLinkage, 0u, + std::string(name) + "lowlift", this->semantics_module); + + options.arch->InitializeEmptyLiftedFunction(this->lifted_func); + llvm::BasicBlock::Create(context, "", func); auto &blk = func->getEntryBlock(); llvm::IRBuilder<> ir(&blk); + ir.CreateStore(memory, ir.CreateAlloca(memory->getType(), nullptr, "MEMORY")); this->state_ptr = this->AllocateAndInitializeStateStructure(&blk, options.arch); @@ -408,7 +428,17 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->addFnAttr(llvm::Attribute::NoInline); func->setLinkage(llvm::GlobalValue::InternalLinkage); - BasicBlockFunction bbf{func, this->state_ptr, pc_arg, mem_arg, next_pc_out}; + // TODO(Ian): memory pointer isnt quite right + std::array args = { + this->state_ptr, pc, mem_arg, next_pc_out}; + auto ret_mem = ir.CreateCall(this->lifted_func, args); + + + this->PackLocals(ir, this->state_ptr, in_vars, + this->block_context.GetAvailableVariables()); + + ir.CreateRet(ret_mem); + BasicBlockFunction bbf{func, pc_arg, in_vars, mem_arg, next_pc_out}; return bbf; } @@ -425,16 +455,14 @@ llvm::StructType *BasicBlockLifter::StructTypeFromVars( } // Packs in scope variables into a struct -llvm::Value * -BasicBlockLifter::PackLocals(llvm::IRBuilder<> &bldr, - llvm::Value *from_state_ptr, - const std::vector &decls) const { - auto ty = this->var_struct_ty; - auto sptr = bldr.CreateAlloca(ty); +void BasicBlockLifter::PackLocals( + llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, + llvm::Value *into_vars, const std::vector &decls) const { + auto i32 = llvm::IntegerType::get(llvm_context, 32); uint64_t field_offset = 0; for (auto decl : decls) { - auto ptr = bldr.CreateGEP(ty, sptr, + auto ptr = bldr.CreateGEP(this->var_struct_ty, into_vars, {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, field_offset)}); field_offset += 1; @@ -445,8 +473,6 @@ BasicBlockLifter::PackLocals(llvm::IRBuilder<> &bldr, remill::LoadMemoryPointer(bldr, this->intrinsics)); bldr.CreateStore(state_loaded_value, ptr); } - - return bldr.CreateLoad(ty, sptr); } void BasicBlockLifter::UnpackLocals( @@ -454,12 +480,16 @@ void BasicBlockLifter::UnpackLocals( llvm::Value *into_state_ptr, const std::vector &decls) const { uint64_t field_offset = 0; + auto i32 = llvm::IntegerType::get(llvm_context, 32); for (auto decl : decls) { - auto extracted_field = - bldr.CreateExtractValue(returned_value, field_offset); + auto ptr = bldr.CreateGEP(this->var_struct_ty, returned_value, + {llvm::ConstantInt::get(i32, 0), + llvm::ConstantInt::get(i32, field_offset)}); + + auto loaded_var_val = bldr.CreateLoad(decl.type, ptr); field_offset += 1; auto new_mem_ptr = StoreNativeValue( - extracted_field, decl, this->type_provider.Dictionary(), + loaded_var_val, decl, this->type_provider.Dictionary(), this->intrinsics, bldr.GetInsertBlock(), into_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); bldr.CreateStore(new_mem_ptr, @@ -474,7 +504,10 @@ void BasicBlockLifter::CallBasicBlockFunction( std::vector args(remill::kNumBlockArgs + 1); - args[remill::kStatePointerArgNum] = parent_state; + + + auto out_param_locals = builder.CreateAlloca(this->var_struct_ty); + args[remill::kStatePointerArgNum] = out_param_locals; args[remill::kPCArgNum] = options.program_counter_init_procedure( builder, pc_reg, cbfunc.GetBlock().addr); @@ -485,19 +518,16 @@ void BasicBlockLifter::CallBasicBlockFunction( remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); - auto packed_locals = - this->PackLocals(builder, parent_state, cbfunc.GetInScopeVaraibles()); - - - args.push_back(packed_locals); + this->PackLocals(builder, parent_state, out_param_locals, + cbfunc.GetInScopeVaraibles()); - auto new_locals = builder.CreateCall(cbfunc.GetFunction(), args); + auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); - //auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); + auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); - // TODO(Ian) move this to an out param: builder.CreateStore(new_mem_ptr, mem_ptr_ref); + builder.CreateStore(new_mem_ptr, mem_ptr_ref); - this->UnpackLocals(builder, new_locals, parent_state, + this->UnpackLocals(builder, out_param_locals, parent_state, cbfunc.GetInScopeVaraibles()); } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index dee01366b..8c63a20e7 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -20,8 +20,8 @@ namespace anvill { struct BasicBlockFunction { llvm::Function *func; - llvm::Value *state_ptr; llvm::Argument *pc_arg; + llvm::Argument *variable_ptr; llvm::Argument *mem_ptr; llvm::Argument *next_pc_out_param; }; @@ -43,6 +43,8 @@ class BasicBlockLifter : public CodeLifter { // The allocated state ptr for the function. llvm::Value *state_ptr{nullptr}; + llvm::Function *lifted_func{nullptr}; + llvm::StructType * StructTypeFromVars(const std::vector &in_scope_locals) const; @@ -52,7 +54,7 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); - void LiftBasicBlockIntoFunction(BasicBlockFunction &basic_block_function); + void LiftInstructionsIntoLiftedFunction(); BasicBlockFunction CreateBasicBlockFunction(); @@ -95,8 +97,9 @@ class BasicBlockLifter : public CodeLifter { CallableBasicBlockFunction LiftBasicBlockFunction() &&; // Packs in scope variables into a struct - llvm::Value *PackLocals(llvm::IRBuilder<> &, llvm::Value *from_state_ptr, - const std::vector &) const; + void PackLocals(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, + llvm::Value *into_vars, + const std::vector &decls) const; void UnpackLocals(llvm::IRBuilder<> &, llvm::Value *returned_value, llvm::Value *into_state_ptr, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index a23b9d2ed..3b7247cce 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -398,6 +399,8 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( // `__attribute__((flatten))`, i.e. recursively inline as much as possible, so // that all semantics and helpers are completely inlined. void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { + // TODO(Ian): yeah i need to debug why i need to strip debug info + //llvm::UpgradeDebugInfo(*this->semantics_module); CHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); this->RecursivelyInlineFunctionCallees(this->native_func); } @@ -421,6 +424,7 @@ FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { this->semantics_module.get(), this->type_specifier); } + void FunctionLifter::VisitBlock(CodeBlock blk, llvm::Value *lifted_function_state) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); @@ -429,7 +433,8 @@ void FunctionLifter::VisitBlock(CodeBlock blk, auto bbfunc = this->LiftBasicBlockFunction(blk); - + // TODO(Ian): yeah i need to debug why i need to strip debug info + //llvm::UpgradeDebugInfo(*this->semantics_module); CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); bbfunc.CallBasicBlockFunction(builder, lifted_function_state); @@ -454,6 +459,10 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { this->VisitBlock(blk, lifted_function_state); } + // TODO(Ian): yeah i need to debug why i need to strip debug info + //llvm::UpgradeDebugInfo(*this->semantics_module); + this->lifted_func->dump(); + CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); } From 86a15b614dfda8eb6d41afa6a97224dd36e9536c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 21:28:46 -0500 Subject: [PATCH 049/163] lifts again --- lib/Lifters/FunctionLifter.cpp | 11 ++--------- lib/Lifters/FunctionLifter.h | 4 ---- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 3b7247cce..52ce62858 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -399,8 +399,6 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( // `__attribute__((flatten))`, i.e. recursively inline as much as possible, so // that all semantics and helpers are completely inlined. void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { - // TODO(Ian): yeah i need to debug why i need to strip debug info - //llvm::UpgradeDebugInfo(*this->semantics_module); CHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); this->RecursivelyInlineFunctionCallees(this->native_func); } @@ -433,8 +431,6 @@ void FunctionLifter::VisitBlock(CodeBlock blk, auto bbfunc = this->LiftBasicBlockFunction(blk); - // TODO(Ian): yeah i need to debug why i need to strip debug info - //llvm::UpgradeDebugInfo(*this->semantics_module); CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); bbfunc.CallBasicBlockFunction(builder, lifted_function_state); @@ -454,15 +450,13 @@ void FunctionLifter::VisitBlock(CodeBlock blk, void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { DLOG(INFO) << "Num blocks for func " << std::hex << this->curr_decl->address << ": " << this->curr_decl->cfg.size(); + + for (const auto &[addr, blk] : this->curr_decl->cfg) { DLOG(INFO) << "Visiting: " << std::hex << addr; this->VisitBlock(blk, lifted_function_state); } - // TODO(Ian): yeah i need to debug why i need to strip debug info - //llvm::UpgradeDebugInfo(*this->semantics_module); - this->lifted_func->dump(); - CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); } @@ -470,7 +464,6 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { LiftedFunction FunctionLifter::CreateLiftedFunction(const std::string &name) { auto new_func = options.arch->DefineLiftedFunction(name, semantics_module.get()); - auto state_ptr = remill::NthArgument(new_func, remill::kStatePointerArgNum); auto pc_arg = remill::NthArgument(new_func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(new_func, remill::kMemoryPointerArgNum); diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index b1ed1365c..22f7dd4ae 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -105,10 +105,6 @@ class FunctionLifter : public CodeLifter { TypeTranslator type_specifier; - // Metadata node to attach to lifted instructions to related them to - // original instructions. - unsigned pc_annotation_id{0}; - llvm::MDNode *pc_annotation{nullptr}; // Address of the function currently being lifted. From 476c98d314ddac36dd97da3bda0a7205e557ac67 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 8 Dec 2022 21:34:37 -0500 Subject: [PATCH 050/163] disable optimization until figure out what's going on --- bin/Decompile/Main.cpp | 2 +- lib/Lifters/BasicBlockLifter.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index f089da416..8f897fa4b 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -241,7 +241,7 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } - anvill::OptimizeModule(lifter, module); + //anvill::OptimizeModule(lifter, module); int ret = EXIT_SUCCESS; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index dba431f38..2157ea5cd 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -28,10 +28,6 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); this->RecursivelyInlineFunctionCallees(bbfunc.func); - - - bbfunc.func->dump(); - //CHECK(false); return CallableBasicBlockFunction(bbfunc.func, this->block_context.GetAvailableVariables(), block_def, std::move(*this)); From b5e3da8225382087f12f9cc242118228f26be428 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 9 Dec 2022 16:30:00 +0100 Subject: [PATCH 051/163] Fix spec loading bug --- lib/Protobuf.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 4d69f8766..4e1742797 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -534,8 +534,9 @@ Result ProtobufTranslator::DecodeGlobalVar( << decl.address << ": " << spec_type.Error(); return ss.str(); } + decl.spec_type = spec_type.TakeValue(); - auto llvm_type = type_translator.DecodeFromSpec(spec_type.Value()); + auto llvm_type = type_translator.DecodeFromSpec(decl.spec_type); if (!llvm_type.Succeeded()) { std::stringstream ss; ss << "Cannot translate type for variable at address " << std::hex From a0ee5a8ffbaedebf2c3762b7aa18495757ab0b17 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 9 Dec 2022 16:30:21 +0100 Subject: [PATCH 052/163] Convert type specs to LLVM metadata --- include/anvill/Type.h | 4 ++ lib/Type.cpp | 113 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/include/anvill/Type.h b/include/anvill/Type.h index 6622596eb..634c016fc 100644 --- a/include/anvill/Type.h +++ b/include/anvill/Type.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include #include @@ -246,6 +248,8 @@ class TypeTranslator { EncodeToString(llvm::Type *type, EncodingFormat alphanum = EncodingFormat::kDefault) const; + llvm::MDNode *EncodeToMetadata(TypeSpec spec) const; + Result DecodeFromSpec(TypeSpec spec) const; }; diff --git a/lib/Type.cpp b/lib/Type.cpp index d4609a529..729ec509a 100644 --- a/lib/Type.cpp +++ b/lib/Type.cpp @@ -7,6 +7,8 @@ */ #include +#include +#include #define ANVILL_USE_WRAPPED_TYPES 0 @@ -41,6 +43,7 @@ class TypeSpecifierImpl { const TypeDictionary type_dict; std::unordered_map type_to_id; std::vector id_to_type; + std::unordered_map type_to_md; inline TypeSpecifierImpl(const TypeDictionary &type_dict_, const llvm::DataLayout &dl_) @@ -52,6 +55,14 @@ class TypeSpecifierImpl { // TypeSpecification.cpp void EncodeType(llvm::Type &type, std::stringstream &ss, EncodingFormat format); + + llvm::MDNode *TypeToMetadata(BaseType type); + llvm::MDNode *TypeToMetadata(std::shared_ptr type); + llvm::MDNode *TypeToMetadata(std::shared_ptr type); + llvm::MDNode *TypeToMetadata(std::shared_ptr type); + llvm::MDNode *TypeToMetadata(std::shared_ptr type); + llvm::MDNode *TypeToMetadata(std::shared_ptr type); + llvm::MDNode *TypeToMetadata(UnknownType type); }; // Translates an llvm::Type to a type that conforms to the spec in @@ -294,6 +305,104 @@ void TypeSpecifierImpl::EncodeType( } } +llvm::MDNode *TypeSpecifierImpl::TypeToMetadata(BaseType type) { + auto str = llvm::MDString::get(context, "BaseType"); + auto value = llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), + static_cast(type)); + return llvm::MDNode::get(context, + {str, llvm::ConstantAsMetadata::get(value)}); +} + +llvm::MDNode * +TypeSpecifierImpl::TypeToMetadata(std::shared_ptr type) { + auto &node = type_to_md[type.get()]; + if (node) { + return node; + } + + auto str = llvm::MDString::get(context, "PointerType"); + auto pointee = + std::visit([this](auto &&t) { return TypeToMetadata(t); }, type->pointee); + return llvm::MDNode::get(context, {str, pointee}); +} + +llvm::MDNode * +TypeSpecifierImpl::TypeToMetadata(std::shared_ptr type) { + auto &node = type_to_md[type.get()]; + if (node) { + return node; + } + + auto str = llvm::MDString::get(context, "VectorType"); + auto base = + std::visit([this](auto &&t) { return TypeToMetadata(t); }, type->base); + auto size = llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), + static_cast(type->size)); + return llvm::MDNode::get(context, + {str, base, llvm::ConstantAsMetadata::get(size)}); +} + +llvm::MDNode * +TypeSpecifierImpl::TypeToMetadata(std::shared_ptr type) { + auto &node = type_to_md[type.get()]; + if (node) { + return node; + } + + auto str = llvm::MDString::get(context, "ArrayType"); + auto base = + std::visit([this](auto &&t) { return TypeToMetadata(t); }, type->base); + auto size = llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), + static_cast(type->size)); + return llvm::MDNode::get(context, + {str, base, llvm::ConstantAsMetadata::get(size)}); +} + +llvm::MDNode * +TypeSpecifierImpl::TypeToMetadata(std::shared_ptr type) { + auto &node = type_to_md[type.get()]; + if (node) { + return node; + } + + auto str = llvm::MDString::get(context, "StructType"); + std::vector md; + md.push_back(str); + for (auto &member : type->members) { + md.push_back( + std::visit([this](auto &&t) { return TypeToMetadata(t); }, member)); + } + return llvm::MDNode::get(context, md); +} + +llvm::MDNode * +TypeSpecifierImpl::TypeToMetadata(std::shared_ptr type) { + auto &node = type_to_md[type.get()]; + if (node) { + return node; + } + + auto str = llvm::MDString::get(context, "FunctionType"); + std::vector md; + md.push_back(str); + md.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::getBool(context, type->is_variadic))); + md.push_back(std::visit([this](auto &&t) { return TypeToMetadata(t); }, + type->return_type)); + for (auto &arg : type->arguments) { + md.push_back( + std::visit([this](auto &&t) { return TypeToMetadata(t); }, arg)); + } + return llvm::MDNode::get(context, md); +} + +llvm::MDNode *TypeSpecifierImpl::TypeToMetadata(UnknownType type) { + auto str = llvm::MDString::get(context, "UnknownType"); + auto size = + llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), type.size); + return llvm::MDNode::get(context, {str, llvm::ConstantAsMetadata::get(size)}); +} + namespace { #if ANVILL_USE_WRAPPED_TYPES @@ -473,6 +582,10 @@ std::string TypeTranslator::EncodeToString( return ss.str(); } +llvm::MDNode *TypeTranslator::EncodeToMetadata(TypeSpec spec) const { + return std::visit([this](auto &&t) { return impl->TypeToMetadata(t); }, spec); +} + // Parse an encoded type string into its represented type. Result TypeTranslator::DecodeFromSpec(TypeSpec spec) const { From 5cd4ef3c7635c32b20d9caf2bd2092a3d0aa2acb Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 9 Dec 2022 16:30:35 +0100 Subject: [PATCH 053/163] Annotate values with type metadata --- lib/Lifters/DataLifter.cpp | 9 +++++--- lib/Lifters/FunctionLifter.cpp | 42 ++++++++++++++++++++++++---------- lib/Lifters/FunctionLifter.h | 3 ++- lib/Lifters/ValueLifter.cpp | 2 +- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/lib/Lifters/DataLifter.cpp b/lib/Lifters/DataLifter.cpp index dd665aa64..a45962d7a 100644 --- a/lib/Lifters/DataLifter.cpp +++ b/lib/Lifters/DataLifter.cpp @@ -195,9 +195,12 @@ llvm::Constant *DataLifter::LiftData(const VariableDecl &decl, auto is_constant = first_byte_perms == BytePermission::kReadable || first_byte_perms == BytePermission::kReadableExecutable; - return new llvm::GlobalVariable(*options.module, type, is_constant, - llvm::GlobalValue::ExternalLinkage, value, - var_name); + auto md = type_specifier.EncodeToMetadata(decl.spec_type); + auto gvar = new llvm::GlobalVariable(*options.module, type, is_constant, + llvm::GlobalValue::ExternalLinkage, + value, var_name); + gvar->setMetadata("anvill.type", md); + return gvar; } // Declare a lifted a variable. Will return `nullptr` if the memory is diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 26e5b4ae9..57c58bbc6 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1146,6 +1147,14 @@ llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { if (decl.is_noreturn) { native_func->addFnAttr(llvm::Attribute::NoReturn); } + + std::vector args; + for (auto &arg : decl.params) { + args.push_back(type_specifier.EncodeToMetadata(arg.spec_type)); + } + native_func->setMetadata("anvill.args", + llvm::MDNode::get(llvm_context, args)); + return native_func; } @@ -1680,7 +1689,7 @@ llvm::Function *EntityLifter::LiftEntity(const FunctionDecl &decl) const { // Add the function to the entity lifter's target module. const auto func_in_target_module = - func_lifter.AddFunctionToContext(func, decl.address, *impl); + func_lifter.AddFunctionToContext(func, decl, *impl); // If we had a previous declaration/definition, then we want to make sure // that we replaced its body, and we also want to make sure that if our @@ -1741,7 +1750,7 @@ llvm::Function *EntityLifter::DeclareEntity(const FunctionDecl &decl) const { if (const auto func = func_lifter.DeclareFunction(decl)) { DCHECK(!module->getFunction(func->getName())); - return func_lifter.AddFunctionToContext(func, decl.address, *impl); + return func_lifter.AddFunctionToContext(func, decl, *impl); } else { return nullptr; } @@ -1770,7 +1779,8 @@ static void EraseFunctionBody(llvm::Function *func) { // function, and copy the function into the context's module. Returns the // version of `func` inside the module of the lifter context. llvm::Function * -FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, +FunctionLifter::AddFunctionToContext(llvm::Function *func, + const FunctionDecl &decl, EntityLifterImpl &lifter_context) const { const auto target_module = options.module; @@ -1794,13 +1804,14 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, // It's possible that we've lifted this function before, but that it was // renamed by user code, and so the above check failed. Go check for that. } else { - lifter_context.ForEachEntityAtAddress(address, [&](llvm::Constant *gv) { - if (auto gv_func = llvm::dyn_cast(gv); - gv_func && gv_func->getFunctionType() == module_func_type) { - CHECK(!new_version); - new_version = gv_func; - } - }); + lifter_context.ForEachEntityAtAddress( + decl.address, [&](llvm::Constant *gv) { + if (auto gv_func = llvm::dyn_cast(gv); + gv_func && gv_func->getFunctionType() == module_func_type) { + CHECK(!new_version); + new_version = gv_func; + } + }); } // This is the first time we're lifting this function, or even the first time @@ -1818,13 +1829,20 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, uint64_t address, // just in case it will be needed in future lifts. EraseFunctionBody(func); - if (auto func_annotation = GetPCAnnotation(address)) { + if (auto func_annotation = GetPCAnnotation(decl.address)) { new_version->setMetadata(pc_annotation_id, func_annotation); } + std::vector args; + for (auto &arg : decl.params) { + args.push_back(type_specifier.EncodeToMetadata(arg.spec_type)); + } + new_version->setMetadata("anvill.args", + llvm::MDNode::get(llvm_context, args)); + // Update the context to keep its internal concepts of what LLVM objects // correspond with which native binary addresses. - lifter_context.AddEntity(new_version, address); + lifter_context.AddEntity(new_version, decl.address); // The function we just lifted may call other functions, so we need to go // find those and also use them to update the context. diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index bf0043f74..697278990 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -70,7 +70,8 @@ class FunctionLifter { // Update the associated entity lifter with information about this // function, and copy the function into the context's module. Returns the // version of `func` inside the module of the lifter context. - llvm::Function *AddFunctionToContext(llvm::Function *func, uint64_t address, + llvm::Function *AddFunctionToContext(llvm::Function *func, + const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; private: diff --git a/lib/Lifters/ValueLifter.cpp b/lib/Lifters/ValueLifter.cpp index 023b7ad84..a1c96dd6a 100644 --- a/lib/Lifters/ValueLifter.cpp +++ b/lib/Lifters/ValueLifter.cpp @@ -49,7 +49,7 @@ ValueLifterImpl::GetFunctionPointer(const FunctionDecl &decl, auto &func_lifter = ent_lifter.function_lifter; auto func = func_lifter.DeclareFunction(decl); auto func_in_context = - func_lifter.AddFunctionToContext(func, decl.address, ent_lifter); + func_lifter.AddFunctionToContext(func, decl, ent_lifter); return func_in_context; } From f4c96ddd4c9174647f89f7fc0cf8fb3f1aac9a88 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 9 Dec 2022 10:36:22 -0500 Subject: [PATCH 054/163] symbolic init stack --- bin/Decompile/Main.cpp | 2 +- lib/Lifters/BasicBlockLifter.cpp | 15 +++++++++++++-- lib/Optimize.cpp | 3 ++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 8f897fa4b..f089da416 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -241,7 +241,7 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } - //anvill::OptimizeModule(lifter, module); + anvill::OptimizeModule(lifter, module); int ret = EXIT_SUCCESS; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 2157ea5cd..bc6cf7663 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -19,6 +19,7 @@ #include "Lifters/CodeLifter.h" #include "anvill/Declarations.h" +#include "anvill/Optimize.h" namespace anvill { @@ -27,7 +28,10 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { this->LiftInstructionsIntoLiftedFunction(); CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); + this->RecursivelyInlineFunctionCallees(bbfunc.func); + anvill::EntityLifter lifter(options); + return CallableBasicBlockFunction(bbfunc.func, this->block_context.GetAvailableVariables(), block_def, std::move(*this)); @@ -401,6 +405,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Can resolve these stack references later . + auto sp_ptr = sp_reg->AddressOf(this->state_ptr, ir); + // Initialize the stack pointer. + ir.CreateStore( + options.stack_pointer_init_procedure(ir, sp_reg, this->block_def.addr), + sp_ptr); + auto stack_offsets = this->block_context.GetStackOffsets(); for (auto ®_off : stack_offsets.affine_equalities) { @@ -422,7 +432,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->addFnAttr(llvm::Attribute::NoInline); - func->setLinkage(llvm::GlobalValue::InternalLinkage); + //func->setLinkage(llvm::GlobalValue::InternalLinkage); // TODO(Ian): memory pointer isnt quite right std::array args = { @@ -447,7 +457,8 @@ llvm::StructType *BasicBlockLifter::StructTypeFromVars( std::back_inserter(field_types), [](const ParameterDecl ¶m) { return param.type; }); - return llvm::StructType::create(llvm_context, field_types); + return llvm::StructType::create(llvm_context, field_types, + "sty_for_basic_block_function"); } // Packs in scope variables into a struct diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 9506c9707..ede428afa 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -160,7 +160,9 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { // mpm.addPass(std::move(inliner)); mpm.addPass(llvm::GlobalOptPass()); + mpm.addPass(llvm::GlobalDCEPass()); + mpm.addPass(llvm::StripDeadDebugInfoPass()); llvm::FunctionPassManager fpm; @@ -216,7 +218,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); fpm.addPass(llvm::SROAPass()); - AddCombineAdjacentShifts(fpm); // Sometimes we have a values in the form of (expr ^ 1) used as branch From 42ec613cd3deac068d4e77edf60408a55c7f2182 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 9 Dec 2022 17:49:26 +0100 Subject: [PATCH 055/163] Begin add pointer lifting stuff --- .../Passes/ConvertPointerArithmeticToGEP.h | 38 +++++ .../anvill/Passes/PropagateTypeAnnotations.h | 40 ++++++ lib/CMakeLists.txt | 2 + lib/Passes/ConvertPointerArithmeticToGEP.cpp | 33 +++++ lib/Passes/PropagateTypeAnnotations.cpp | 132 ++++++++++++++++++ 5 files changed, 245 insertions(+) create mode 100644 include/anvill/Passes/ConvertPointerArithmeticToGEP.h create mode 100644 include/anvill/Passes/PropagateTypeAnnotations.h create mode 100644 lib/Passes/ConvertPointerArithmeticToGEP.cpp create mode 100644 lib/Passes/PropagateTypeAnnotations.cpp diff --git a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h new file mode 100644 index 000000000..31e41a9b7 --- /dev/null +++ b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace anvill { + +class ConvertPointerArithmeticToGEP final + : public llvm::PassInfoMixin { + private: + struct Impl; + std::unique_ptr impl; + + public: + // Function pass entry point + llvm::PreservedAnalyses run(llvm::Function &function, + llvm::FunctionAnalysisManager &fam); + + // Returns the pass name + static llvm::StringRef name(void); + + ConvertPointerArithmeticToGEP(); +}; + +} // namespace anvill diff --git a/include/anvill/Passes/PropagateTypeAnnotations.h b/include/anvill/Passes/PropagateTypeAnnotations.h new file mode 100644 index 000000000..bfd1b925a --- /dev/null +++ b/include/anvill/Passes/PropagateTypeAnnotations.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2022-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace anvill { + +class PropagateTypeAnnotations final + : public llvm::PassInfoMixin { + private: + struct Impl; + std::unique_ptr impl; + + public: + using StructMap = std::unordered_map; + + // Function pass entry point + llvm::PreservedAnalyses run(llvm::Function &function, + llvm::FunctionAnalysisManager &fam); + + // Returns the pass name + static llvm::StringRef name(void); + + PropagateTypeAnnotations(StructMap &structs); +}; + +} // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 96fabdb8c..47ac63f2a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -32,6 +32,7 @@ protobuf_generate_cpp( set(anvill_passes ConvertAddressesToEntityUses ConvertMasksToCasts + ConvertPointerArithmeticToGEP ConvertSymbolicReturnAddressToConcreteReturnAddress ConvertXorsToCmps HoistUsersOfSelectsAndPhis @@ -42,6 +43,7 @@ set(anvill_passes LowerRemillUndefinedIntrinsics LowerSwitchIntrinsics LowerTypeHintIntrinsics + PropagateTypeAnnotations RecoverBasicStackFrame RemoveCompilerBarriers RemoveDelaySlotIntrinsics diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp new file mode 100644 index 000000000..5ea20935b --- /dev/null +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace anvill { +struct ConvertPointerArithmeticToGEP::Impl {}; + +ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP() + : impl(std::make_unique()) {} + +llvm::StringRef ConvertPointerArithmeticToGEP::name() { + return "ConvertPointerArithmeticToGEP"; +} + +llvm::PreservedAnalyses +ConvertPointerArithmeticToGEP::run(llvm::Function &function, + llvm::FunctionAnalysisManager &fam) { + + return llvm::PreservedAnalyses::none(); +} +} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/PropagateTypeAnnotations.cpp b/lib/Passes/PropagateTypeAnnotations.cpp new file mode 100644 index 000000000..78daa8ba0 --- /dev/null +++ b/lib/Passes/PropagateTypeAnnotations.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2022-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace anvill { +struct PropagateTypeAnnotations::Impl { + StructMap &structs; + + llvm::Type *MDToType(llvm::LLVMContext &context, llvm::MDNode *md); + llvm::StructType *MDToStruct(llvm::LLVMContext &context, llvm::MDNode *md); + + Impl(StructMap &structs) : structs(structs) {} +}; + +llvm::Type *PropagateTypeAnnotations::Impl::MDToType(llvm::LLVMContext &context, + llvm::MDNode *md) { + auto tag = llvm::cast(md->getOperand(0).get()); + auto tag_string = tag->getString(); + if (tag_string == "BaseType") { + auto kind_const = + llvm::cast(md->getOperand(1).get()); + auto kind_int = llvm::cast(kind_const->getValue()); + auto kind = static_cast(kind_int->getZExtValue()); + + switch (kind) { + case BaseType::Bool: + case BaseType::Char: + case BaseType::SignedChar: + case BaseType::UnsignedChar: + case BaseType::Int8: + case BaseType::UInt8: + case BaseType::Padding: return llvm::Type::getInt8Ty(context); + + case BaseType::Int16: + case BaseType::UInt16: return llvm::Type::getInt16Ty(context); + + case BaseType::Int24: + case BaseType::UInt24: + case BaseType::Int32: + case BaseType::UInt32: return llvm::Type::getInt32Ty(context); + + case BaseType::Int64: + case BaseType::UInt64: return llvm::Type::getInt64Ty(context); + + case BaseType::Int128: + case BaseType::UInt128: return llvm::Type::getInt128Ty(context); + + case BaseType::Float16: return llvm::Type::getHalfTy(context); + case BaseType::Float32: return llvm::Type::getFloatTy(context); + case BaseType::Float64: return llvm::Type::getDoubleTy(context); + case BaseType::Float80: return llvm::Type::getX86_FP80Ty(context); + case BaseType::Float128: return llvm::Type::getFP128Ty(context); + case BaseType::MMX64: return llvm::Type::getX86_MMXTy(context); + + case BaseType::Void: return llvm::Type::getVoidTy(context); + + default: return nullptr; + } + } else if (tag_string == "PointerType") { + return llvm::PointerType::get(context, 0); + } else if (tag_string == "VectorType") { + auto elem = + MDToType(context, llvm::cast(md->getOperand(1).get())); + auto size_const = + llvm::cast(md->getOperand(2).get()); + auto size_int = llvm::cast(size_const->getValue()); + return llvm::VectorType::get(elem, size_int->getZExtValue(), false); + } else if (tag_string == "ArrayType") { + auto elem = + MDToType(context, llvm::cast(md->getOperand(1).get())); + auto size_const = + llvm::cast(md->getOperand(2).get()); + auto size_int = llvm::cast(size_const->getValue()); + return llvm::ArrayType::get(elem, size_int->getZExtValue()); + } else if (tag_string == "StructType") { + return MDToStruct(context, md); + } else if (tag_string == "FunctionType") { + // TODO(frabert) + return nullptr; + } else if (tag_string == "UnknownType") { + auto size_const = + llvm::cast(md->getOperand(1).get()); + auto size_int = llvm::cast(size_const->getValue()); + return llvm::Type::getIntNTy(context, size_int->getZExtValue() * 8); + } + return nullptr; +} + +llvm::StructType * +PropagateTypeAnnotations::Impl::MDToStruct(llvm::LLVMContext &context, + llvm::MDNode *md) { + auto &struct_ = structs[md]; + if (struct_) { + return struct_; + } + + std::vector elems; + for (unsigned i = 1; i < md->getNumOperands(); ++i) { + elems.push_back( + MDToType(context, llvm::cast(md->getOperand(i).get()))); + } + struct_ = llvm::StructType::get(context, elems, true); + + return struct_; +} + +PropagateTypeAnnotations::PropagateTypeAnnotations(StructMap &structs) + : impl(std::make_unique(structs)) {} + +llvm::StringRef PropagateTypeAnnotations::name() { + return "PropagateTypeAnnotations"; +} + +llvm::PreservedAnalyses +PropagateTypeAnnotations::run(llvm::Function &function, + llvm::FunctionAnalysisManager &fam) { + + return llvm::PreservedAnalyses::all(); +} +} // namespace anvill \ No newline at end of file From bdf898faf0dc1a9436183488415db9efa4c89e97 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 11 Dec 2022 16:10:50 +0100 Subject: [PATCH 056/163] First conversion heuristics --- .../Passes/ConvertPointerArithmeticToGEP.h | 9 +- .../anvill/Passes/PropagateTypeAnnotations.h | 40 -- lib/CMakeLists.txt | 1 - lib/Optimize.cpp | 7 + lib/Passes/ConvertPointerArithmeticToGEP.cpp | 406 +++++++++++++++++- lib/Passes/PropagateTypeAnnotations.cpp | 132 ------ 6 files changed, 417 insertions(+), 178 deletions(-) delete mode 100644 include/anvill/Passes/PropagateTypeAnnotations.h delete mode 100644 lib/Passes/PropagateTypeAnnotations.cpp diff --git a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h index 31e41a9b7..658287ee0 100644 --- a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h +++ b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -25,6 +26,10 @@ class ConvertPointerArithmeticToGEP final std::unique_ptr impl; public: + using StructMap = std::unordered_map; + using TypeMap = std::unordered_map; + using MDMap = std::unordered_map; + // Function pass entry point llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &fam); @@ -32,7 +37,9 @@ class ConvertPointerArithmeticToGEP final // Returns the pass name static llvm::StringRef name(void); - ConvertPointerArithmeticToGEP(); + ConvertPointerArithmeticToGEP(TypeMap &types, StructMap &structs, MDMap &md); + ConvertPointerArithmeticToGEP(const ConvertPointerArithmeticToGEP &); + ~ConvertPointerArithmeticToGEP(); }; } // namespace anvill diff --git a/include/anvill/Passes/PropagateTypeAnnotations.h b/include/anvill/Passes/PropagateTypeAnnotations.h deleted file mode 100644 index bfd1b925a..000000000 --- a/include/anvill/Passes/PropagateTypeAnnotations.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2022-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include - -namespace anvill { - -class PropagateTypeAnnotations final - : public llvm::PassInfoMixin { - private: - struct Impl; - std::unique_ptr impl; - - public: - using StructMap = std::unordered_map; - - // Function pass entry point - llvm::PreservedAnalyses run(llvm::Function &function, - llvm::FunctionAnalysisManager &fam); - - // Returns the pass name - static llvm::StringRef name(void); - - PropagateTypeAnnotations(StructMap &structs); -}; - -} // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 47ac63f2a..c44090c9d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -43,7 +43,6 @@ set(anvill_passes LowerRemillUndefinedIntrinsics LowerSwitchIntrinsics LowerTypeHintIntrinsics - PropagateTypeAnnotations RecoverBasicStackFrame RemoveCompilerBarriers RemoveDelaySlotIntrinsics diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index df3586c95..407329425 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -56,9 +56,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -140,6 +142,10 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { pc_metadata_id = context.getMDKindID(options.pc_metadata_name); } + ConvertPointerArithmeticToGEP::StructMap structs; + ConvertPointerArithmeticToGEP::TypeMap types; + ConvertPointerArithmeticToGEP::MDMap md; + llvm::PassBuilder pb; llvm::ModulePassManager mpm; llvm::ModuleAnalysisManager mam; @@ -230,6 +236,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { AddBranchRecovery(fpm); AddLowerSwitchIntrinsics(fpm, mp); + fpm.addPass(ConvertPointerArithmeticToGEP(types, structs, md)); pb.crossRegisterProxies(lam, fam, cam, mam); diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 5ea20935b..701281a15 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -8,26 +8,424 @@ #include #include +#include +#include #include +#include #include +#include +#include +#include +#include +#include #include #include +#include +#include #include +#include +#include + +#include +#include +#include +#include namespace anvill { -struct ConvertPointerArithmeticToGEP::Impl {}; +struct ConvertPointerArithmeticToGEP::Impl { + TypeMap &types; + StructMap &structs; + MDMap &md; + + TypeSpec MDToTypeSpec(llvm::MDNode *md); + std::optional GetTypeInfo(llvm::Value *val); + + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, BaseType t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, UnknownType t); + llvm::Type *TypeSpecToType(llvm::LLVMContext &context, TypeSpec type); + + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, BaseType t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, UnknownType t); + llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, TypeSpec type); + + void FoldPtrAdd(llvm::Function &f); + + Impl(TypeMap &types, StructMap &structs, MDMap &md) + : types(types), + structs(structs), + md(md) {} +}; + + +llvm::Type * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, + BaseType t) { + switch (t) { + case BaseType::Bool: + case BaseType::Char: + case BaseType::SignedChar: + case BaseType::UnsignedChar: + case BaseType::Int8: + case BaseType::UInt8: + case BaseType::Padding: return llvm::Type::getInt8Ty(context); + + case BaseType::Int16: + case BaseType::UInt16: return llvm::Type::getInt16Ty(context); + + case BaseType::Int24: + case BaseType::UInt24: return llvm::Type::getIntNTy(context, 24); + + case BaseType::Int32: + case BaseType::UInt32: return llvm::Type::getInt32Ty(context); + + case BaseType::Int64: + case BaseType::UInt64: return llvm::Type::getInt64Ty(context); + + case BaseType::Int128: + case BaseType::UInt128: return llvm::Type::getInt128Ty(context); + + case BaseType::Float16: return llvm::Type::getHalfTy(context); + case BaseType::Float32: return llvm::Type::getFloatTy(context); + case BaseType::Float64: return llvm::Type::getDoubleTy(context); + case BaseType::Float80: return llvm::Type::getX86_FP80Ty(context); + case BaseType::Float128: return llvm::Type::getFP128Ty(context); + case BaseType::MMX64: return llvm::Type::getX86_MMXTy(context); + + case BaseType::Void: return llvm::Type::getVoidTy(context); + + default: return nullptr; + } +} + +llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( + llvm::LLVMContext &context, std::shared_ptr t) { + return llvm::PointerType::get(context, 0); +} + +llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( + llvm::LLVMContext &context, std::shared_ptr t) { + return llvm::ArrayType::get(TypeSpecToType(context, t->base), t->size); +} + +llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( + llvm::LLVMContext &context, std::shared_ptr t) { + return llvm::FixedVectorType::get(TypeSpecToType(context, t->base), t->size); +} + +llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( + llvm::LLVMContext &context, std::shared_ptr t) { + auto &type = structs[t.get()]; + if (type) { + return type; + } + + std::vector members; + for (auto member : t->members) { + members.push_back(TypeSpecToType(context, member)); + } + type = llvm::StructType::get(context, members, /*isPacked=*/true); + return type; +} + +llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( + llvm::LLVMContext &context, std::shared_ptr t) { + std::vector args; + for (auto arg : t->arguments) { + args.push_back(TypeSpecToType(context, arg)); + } + return llvm::FunctionType::get(TypeSpecToType(context, t->return_type), args, + t->is_variadic); +} + +llvm::Type * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, + UnknownType t) { + return llvm::Type::getIntNTy(context, t.size * 8); +} + +llvm::Type * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, + TypeSpec type) { + return std::visit( + [this, &context](auto &&t) { return TypeSpecToType(context, t); }, type); +} + +TypeSpec ConvertPointerArithmeticToGEP::Impl::MDToTypeSpec(llvm::MDNode *md) { + if (types.count(md)) { + return types[md]; + } + + auto &type = types[md]; + auto tag = llvm::cast(md->getOperand(0).get()); + auto tag_string = tag->getString(); + if (tag_string == "BaseType") { + auto kind_const = + llvm::cast(md->getOperand(1).get()); + auto kind_int = llvm::cast(kind_const->getValue()); + auto kind = static_cast(kind_int->getZExtValue()); + + type = kind; + } else if (tag_string == "PointerType") { + auto pointee = + MDToTypeSpec(llvm::cast(md->getOperand(1).get())); + type = std::make_shared(pointee, false); + } else if (tag_string == "VectorType") { + auto elem = MDToTypeSpec(llvm::cast(md->getOperand(1).get())); + auto size_const = + llvm::cast(md->getOperand(2).get()); + auto size_int = llvm::cast(size_const->getValue()); + type = std::make_shared(elem, size_int->getZExtValue()); + } else if (tag_string == "ArrayType") { + auto elem = MDToTypeSpec(llvm::cast(md->getOperand(1).get())); + auto size_const = + llvm::cast(md->getOperand(2).get()); + auto size_int = llvm::cast(size_const->getValue()); + type = std::make_shared(elem, size_int->getZExtValue()); + } else if (tag_string == "StructType") { + auto struct_ = std::make_shared(); + for (unsigned i = 1; i < md->getNumOperands(); ++i) { + struct_->members.push_back( + MDToTypeSpec(llvm::cast(md->getOperand(i).get()))); + } + type = struct_; + } else if (tag_string == "FunctionType") { + // TODO(frabert) + } else if (tag_string == "UnknownType") { + auto size_const = + llvm::cast(md->getOperand(1).get()); + auto size_int = llvm::cast(size_const->getValue()); + type = UnknownType{static_cast(size_int->getZExtValue())}; + } + return type; +} + +std::optional +ConvertPointerArithmeticToGEP::Impl::GetTypeInfo(llvm::Value *val) { + llvm::MDNode *md = nullptr; + if (auto arg = llvm::dyn_cast(val)) { + auto args_md = arg->getParent()->getMetadata("anvill.args"); + if (!args_md) { + return {}; + } + + md = llvm::cast(args_md->getOperand(arg->getArgNo()).get()); + } else if (auto gvar = llvm::dyn_cast(val)) { + md = gvar->getMetadata("anvill.type"); + } else if (auto ptr_insn = llvm::dyn_cast(val)) { + md = ptr_insn->getMetadata("anvill.type"); + } -ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP() - : impl(std::make_unique()) {} + if (!md) { + return {}; + } + + return MDToTypeSpec(md); +} + +llvm::MDNode * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD(llvm::LLVMContext &context, + BaseType t) { + auto str = llvm::MDString::get(context, "BaseType"); + auto value = llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), + static_cast(t)); + return llvm::MDNode::get(context, + {str, llvm::ConstantAsMetadata::get(value)}); +} + +llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( + llvm::LLVMContext &context, std::shared_ptr t) { + auto str = llvm::MDString::get(context, "PointerType"); + return llvm::MDNode::get(context, {str, TypeSpecToMD(context, t->pointee)}); +} + +llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( + llvm::LLVMContext &context, std::shared_ptr t) { + auto str = llvm::MDString::get(context, "ArrayType"); + auto size = + llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), t->size); + return llvm::MDNode::get(context, {str, TypeSpecToMD(context, t->base), + llvm::ConstantAsMetadata::get(size)}); +} + +llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( + llvm::LLVMContext &context, std::shared_ptr t) { + auto str = llvm::MDString::get(context, "VectorType"); + auto size = + llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), t->size); + return llvm::MDNode::get(context, {str, TypeSpecToMD(context, t->base), + llvm::ConstantAsMetadata::get(size)}); +} + +llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( + llvm::LLVMContext &context, std::shared_ptr t) { + auto str = llvm::MDString::get(context, "ArrayType"); + std::vector members; + members.push_back(str); + for (auto member : t->members) { + members.push_back(TypeSpecToMD(context, member)); + } + return llvm::MDNode::get(context, members); +} + +llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( + llvm::LLVMContext &context, std::shared_ptr t) { + return nullptr; +} + +llvm::MDNode * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD(llvm::LLVMContext &context, + UnknownType t) { + auto str = llvm::MDString::get(context, "UnknownType"); + auto size = llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(context), + static_cast(t.size)); + return llvm::MDNode::get(context, {str, llvm::ConstantAsMetadata::get(size)}); +} + +llvm::MDNode * +ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD(llvm::LLVMContext &context, + TypeSpec type) { + return std::visit( + [this, &context](auto &&t) { return TypeSpecToMD(context, t); }, type); +} + +ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP(TypeMap &types, + StructMap &structs, + MDMap &md) + : impl(std::make_unique(types, structs, md)) {} + +ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP( + const ConvertPointerArithmeticToGEP &pass) + : impl(std::make_unique(pass.impl->types, pass.impl->structs, + pass.impl->md)) {} + + +ConvertPointerArithmeticToGEP::~ConvertPointerArithmeticToGEP() = default; llvm::StringRef ConvertPointerArithmeticToGEP::name() { return "ConvertPointerArithmeticToGEP"; } +// Finds `(inttoptr (add (ptrtoint P), A))` and tries to convert to GEP +void ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { + using namespace llvm::PatternMatch; + llvm::Value *ptr; + llvm::ConstantInt *offset_const; + auto &context = f.getContext(); + auto &dl = f.getParent()->getDataLayout(); + auto pat = + m_IntToPtr(m_Add(m_PtrToInt(m_Value(ptr)), m_ConstantInt(offset_const))); + for (auto &insn : llvm::instructions(f)) { + if (!match(&insn, pat)) { + continue; + } + + auto maybe_ptr_type = GetTypeInfo(ptr); + if (!maybe_ptr_type.has_value()) { + continue; + } + + if (!std::holds_alternative>( + *maybe_ptr_type)) { + continue; + } + + auto pointee_spec = + std::get>(*maybe_ptr_type)->pointee; + auto pointee_type = TypeSpecToType(context, pointee_spec); + + auto offset = offset_const->getZExtValue(); + std::vector indices; + + auto cur_spec = pointee_spec; + auto cur_type = pointee_type; + { + auto cur_size = dl.getTypeSizeInBits(cur_type) / 8; + auto index = offset / cur_size; + indices.push_back(index); + offset = offset % cur_size; + } + while (offset != 0) { + if (std::holds_alternative>(cur_spec)) { + auto struct_spec = std::get>(cur_spec); + auto struct_type = llvm::cast(cur_type); + + auto layout = dl.getStructLayout(struct_type); + auto index = layout->getElementContainingOffset(offset); + indices.push_back(index); + + cur_spec = struct_spec->members[index]; + cur_type = struct_type->getElementType(index); + offset -= layout->getElementOffset(index); + } else if (std::holds_alternative>(cur_spec)) { + auto arr_spec = std::get>(cur_spec); + auto arr_type = llvm::cast(cur_type); + + auto elem_size = + dl.getTypeSizeInBits(arr_type->getArrayElementType()) / 8; + auto index = offset / elem_size; + indices.push_back(index); + + cur_spec = arr_spec->base; + cur_type = arr_type->getArrayElementType(); + offset -= index * elem_size; + } else if (std::holds_alternative>( + cur_spec)) { + auto vec_spec = std::get>(cur_spec); + auto vec_type = llvm::cast(cur_type); + + auto elem_size = dl.getTypeSizeInBits(vec_type->getElementType()) / 8; + auto index = offset / elem_size; + indices.push_back(index); + + cur_spec = vec_spec->base; + cur_type = vec_type->getElementType(); + offset -= index * elem_size; + } else { + break; + } + } + + if (offset != 0) { + continue; + } + + std::vector indices_values; + auto i32 = llvm::Type::getInt32Ty(context); + for (auto i : indices) { + indices_values.push_back(llvm::ConstantInt::get(i32, i)); + } + auto gep = + llvm::GetElementPtrInst::Create(pointee_type, ptr, indices_values, "", + insn.getNextNonDebugInstruction()); + gep->setMetadata("anvill.type", TypeSpecToMD(context, cur_spec)); + insn.replaceAllUsesWith(gep); + } +} + llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::run(llvm::Function &function, llvm::FunctionAnalysisManager &fam) { - + impl->FoldPtrAdd(function); return llvm::PreservedAnalyses::none(); } } // namespace anvill \ No newline at end of file diff --git a/lib/Passes/PropagateTypeAnnotations.cpp b/lib/Passes/PropagateTypeAnnotations.cpp deleted file mode 100644 index 78daa8ba0..000000000 --- a/lib/Passes/PropagateTypeAnnotations.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2022-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace anvill { -struct PropagateTypeAnnotations::Impl { - StructMap &structs; - - llvm::Type *MDToType(llvm::LLVMContext &context, llvm::MDNode *md); - llvm::StructType *MDToStruct(llvm::LLVMContext &context, llvm::MDNode *md); - - Impl(StructMap &structs) : structs(structs) {} -}; - -llvm::Type *PropagateTypeAnnotations::Impl::MDToType(llvm::LLVMContext &context, - llvm::MDNode *md) { - auto tag = llvm::cast(md->getOperand(0).get()); - auto tag_string = tag->getString(); - if (tag_string == "BaseType") { - auto kind_const = - llvm::cast(md->getOperand(1).get()); - auto kind_int = llvm::cast(kind_const->getValue()); - auto kind = static_cast(kind_int->getZExtValue()); - - switch (kind) { - case BaseType::Bool: - case BaseType::Char: - case BaseType::SignedChar: - case BaseType::UnsignedChar: - case BaseType::Int8: - case BaseType::UInt8: - case BaseType::Padding: return llvm::Type::getInt8Ty(context); - - case BaseType::Int16: - case BaseType::UInt16: return llvm::Type::getInt16Ty(context); - - case BaseType::Int24: - case BaseType::UInt24: - case BaseType::Int32: - case BaseType::UInt32: return llvm::Type::getInt32Ty(context); - - case BaseType::Int64: - case BaseType::UInt64: return llvm::Type::getInt64Ty(context); - - case BaseType::Int128: - case BaseType::UInt128: return llvm::Type::getInt128Ty(context); - - case BaseType::Float16: return llvm::Type::getHalfTy(context); - case BaseType::Float32: return llvm::Type::getFloatTy(context); - case BaseType::Float64: return llvm::Type::getDoubleTy(context); - case BaseType::Float80: return llvm::Type::getX86_FP80Ty(context); - case BaseType::Float128: return llvm::Type::getFP128Ty(context); - case BaseType::MMX64: return llvm::Type::getX86_MMXTy(context); - - case BaseType::Void: return llvm::Type::getVoidTy(context); - - default: return nullptr; - } - } else if (tag_string == "PointerType") { - return llvm::PointerType::get(context, 0); - } else if (tag_string == "VectorType") { - auto elem = - MDToType(context, llvm::cast(md->getOperand(1).get())); - auto size_const = - llvm::cast(md->getOperand(2).get()); - auto size_int = llvm::cast(size_const->getValue()); - return llvm::VectorType::get(elem, size_int->getZExtValue(), false); - } else if (tag_string == "ArrayType") { - auto elem = - MDToType(context, llvm::cast(md->getOperand(1).get())); - auto size_const = - llvm::cast(md->getOperand(2).get()); - auto size_int = llvm::cast(size_const->getValue()); - return llvm::ArrayType::get(elem, size_int->getZExtValue()); - } else if (tag_string == "StructType") { - return MDToStruct(context, md); - } else if (tag_string == "FunctionType") { - // TODO(frabert) - return nullptr; - } else if (tag_string == "UnknownType") { - auto size_const = - llvm::cast(md->getOperand(1).get()); - auto size_int = llvm::cast(size_const->getValue()); - return llvm::Type::getIntNTy(context, size_int->getZExtValue() * 8); - } - return nullptr; -} - -llvm::StructType * -PropagateTypeAnnotations::Impl::MDToStruct(llvm::LLVMContext &context, - llvm::MDNode *md) { - auto &struct_ = structs[md]; - if (struct_) { - return struct_; - } - - std::vector elems; - for (unsigned i = 1; i < md->getNumOperands(); ++i) { - elems.push_back( - MDToType(context, llvm::cast(md->getOperand(i).get()))); - } - struct_ = llvm::StructType::get(context, elems, true); - - return struct_; -} - -PropagateTypeAnnotations::PropagateTypeAnnotations(StructMap &structs) - : impl(std::make_unique(structs)) {} - -llvm::StringRef PropagateTypeAnnotations::name() { - return "PropagateTypeAnnotations"; -} - -llvm::PreservedAnalyses -PropagateTypeAnnotations::run(llvm::Function &function, - llvm::FunctionAnalysisManager &fam) { - - return llvm::PreservedAnalyses::all(); -} -} // namespace anvill \ No newline at end of file From d8802a62790fd167003c385ab1162231c8423b6c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Sun, 11 Dec 2022 11:03:41 -0500 Subject: [PATCH 057/163] remove old transform --- lib/CMakeLists.txt | 2 - lib/Lifters/BasicBlockTransform.cpp | 92 ----------------------------- lib/Lifters/BasicBlockTransform.h | 50 ---------------- lib/Lifters/FunctionLifter.h | 4 -- 4 files changed, 148 deletions(-) delete mode 100644 lib/Lifters/BasicBlockTransform.cpp delete mode 100644 lib/Lifters/BasicBlockTransform.h diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 1f45682e1..a058f376d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -86,7 +86,6 @@ set(anvill_lifters_HEADERS "Lifters/BasicBlockLifter.h" "Lifters/CodeLifter.h" "Lifters/ValueLifter.h" - "Lifters/BasicBlockTransform.h" ) set(anvill_lifters_SOURCES @@ -97,7 +96,6 @@ set(anvill_lifters_SOURCES "Lifters/BasicBlockLifter.cpp" "Lifters/Options.cpp" "Lifters/ValueLifter.cpp" - "Lifters/BasicBlockTransform.cpp" ) set(anvill_providers_SOURCES diff --git a/lib/Lifters/BasicBlockTransform.cpp b/lib/Lifters/BasicBlockTransform.cpp deleted file mode 100644 index a3b4adaa5..000000000 --- a/lib/Lifters/BasicBlockTransform.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "BasicBlockTransform.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "anvill/ABI.h" -#include "anvill/Lifters.h" - -namespace anvill { -Transformed -CallAndInitializeParameters::TransformInternal(const AnvillBasicBlock &bb) { - std::vector args( - bb.basic_block_repr_func->getFunctionType()->param_begin(), - bb.basic_block_repr_func->getFunctionType()->param_end()); - - - auto num_bb_func_pars = args.size(); - auto vars = bb.context.GetAvailableVariables(); - for (const auto &v : vars) { - args.push_back(v.type); - } - - auto ntype = llvm::FunctionType::get( - bb.basic_block_repr_func->getReturnType(), args, false); - - auto nfunc = llvm::Function::Create(ntype, llvm::GlobalValue::ExternalLinkage, - bb.basic_block_repr_func->getName(), - bb.basic_block_repr_func->getParent()); - - - llvm::ValueToValueMapTy mp; - - for (size_t i = 0; - i < bb.basic_block_repr_func->getFunctionType()->getNumParams(); i++) { - mp.insert({bb.basic_block_repr_func->getArg(i), nfunc->getArg(i)}); - } - - llvm::SmallVector rets; - llvm::CloneFunctionInto(nfunc, bb.basic_block_repr_func, mp, - llvm::CloneFunctionChangeType::LocalChangesOnly, - rets); - - llvm::IRBuilder<> ir(&nfunc->getEntryBlock()); - // TODO(Ian): instead of doing this allow StoreNative to take an insertion point that isnt a block end. - auto cont_block = - llvm::cast(nfunc->getEntryBlock().getTerminator()) - ->getSuccessor(0); - nfunc->getEntryBlock().getTerminator()->eraseFromParent(); - - auto state_ptr = nfunc->getArg(remill::kStatePointerArgNum); - - auto mem_ptr_ref = remill::LoadMemoryPointerRef(&nfunc->getEntryBlock()); - auto mem_ptr_ty = nfunc->getArg(remill::kMemoryPointerArgNum)->getType(); - - for (size_t i = 0; i < vars.size(); i++) { - llvm::Value *native_val = nfunc->getArg(i + num_bb_func_pars); - auto decl = vars[i]; - auto mem_ptr = ir.CreateLoad(mem_ptr_ty, mem_ptr_ref); - auto new_mem = - StoreNativeValue(native_val, decl, this->types, this->intrinsics, - ir.GetInsertBlock(), state_ptr, mem_ptr); - ir.CreateStore(new_mem, mem_ptr_ref); - } - - llvm::BranchInst::Create(cont_block, &nfunc->getEntryBlock()); - - return {nfunc, vars}; -} - -Transformed BasicBlockTransform::Transform(const AnvillBasicBlock &bb) { - auto res = this->TransformInternal(bb); - res.new_func->setMetadata( - kBasicBlockMetadata, - bb.basic_block_repr_func->getMetadata(kBasicBlockMetadata)); - return res; -} - - -} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockTransform.h b/lib/Lifters/BasicBlockTransform.h deleted file mode 100644 index 5a4117e2a..000000000 --- a/lib/Lifters/BasicBlockTransform.h +++ /dev/null @@ -1,50 +0,0 @@ - -#include -#include -#include -#include - -#include -namespace anvill { - -struct AnvillBasicBlock { - llvm::Function *basic_block_repr_func; - const BasicBlockContext &context; -}; - - -struct Transformed { - llvm::Function *new_func; - std::vector appended_args; -}; - -class BasicBlockTransform { - public: - BasicBlockTransform(const TypeDictionary &types, - const remill::IntrinsicTable &intrinsics) - : types(types), - intrinsics(intrinsics) {} - - public: - virtual Transformed Transform(const AnvillBasicBlock &bb); - - protected: - virtual Transformed TransformInternal(const AnvillBasicBlock &bb) { - return {bb.basic_block_repr_func, {}}; - }; - - const TypeDictionary &types; - const remill::IntrinsicTable &intrinsics; -}; - - -class CallAndInitializeParameters : public BasicBlockTransform { - protected: - virtual Transformed TransformInternal(const AnvillBasicBlock &bb); - - public: - CallAndInitializeParameters(const TypeDictionary &types, - const remill::IntrinsicTable &intrinsics) - : BasicBlockTransform(types, intrinsics) {} -}; -} // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 22f7dd4ae..6aaeaf26a 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -29,7 +29,6 @@ #include #include -#include "BasicBlockTransform.h" #include "CodeLifter.h" #include "Lifters/BasicBlockLifter.h" @@ -190,9 +189,6 @@ class FunctionLifter : public CodeLifter { // instruction. llvm::BasicBlock *GetOrCreateBlock(uint64_t addr); - void ApplyBasicBlockTransform(BasicBlockTransform &transform, - llvm::Value *lifted_function_state); - // Attempts to lookup any redirection of the given address, and then // calls GetOrCreateBlock llvm::BasicBlock * From 0da8b48d3d602578bab02ef4c2d243451fc7ae4f Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 12 Dec 2022 12:15:49 +0100 Subject: [PATCH 058/163] Convert global loads to pointers --- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 67 +++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 701281a15..3729b32b9 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,7 @@ struct ConvertPointerArithmeticToGEP::Impl { llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, UnknownType t); llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, TypeSpec type); + void ConvertLoadInt(llvm::Function &f); void FoldPtrAdd(llvm::Function &f); Impl(TypeMap &types, StructMap &structs, MDMap &md) @@ -278,7 +280,7 @@ llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( llvm::MDNode *ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD( llvm::LLVMContext &context, std::shared_ptr t) { - auto str = llvm::MDString::get(context, "ArrayType"); + auto str = llvm::MDString::get(context, "StructType"); std::vector members; members.push_back(str); for (auto member : t->members) { @@ -325,6 +327,68 @@ llvm::StringRef ConvertPointerArithmeticToGEP::name() { return "ConvertPointerArithmeticToGEP"; } +// Finds `(load i64, P)` and converts it to `(ptrtoint (load ptr, P))` +void ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { + using namespace llvm::PatternMatch; + llvm::Value *ptr; + auto &context = f.getContext(); + auto &dl = f.getParent()->getDataLayout(); + auto pat = m_Load(m_Value(ptr)); + for (auto &insn : llvm::instructions(f)) { + if (!match(&insn, pat)) { + continue; + } + + auto old_load = llvm::cast(&insn); + auto load_ty = old_load->getType(); + if (load_ty != llvm::Type::getIntNTy(context, dl.getPointerSizeInBits())) { + continue; + } + + auto maybe_type_info = GetTypeInfo(ptr); + if (!maybe_type_info) { + continue; + } + auto type_info = *maybe_type_info; + + if (auto gvar = llvm::dyn_cast(ptr)) { + if (!std::holds_alternative>(type_info)) { + continue; + } + + auto ptr_type = std::get>(type_info); + auto new_load = new llvm::LoadInst(llvm::PointerType::get(context, 0), + ptr, "", &insn); + new_load->setMetadata("anvill.type", TypeSpecToMD(context, type_info)); + auto ptrtoint = new llvm::PtrToIntInst(new_load, load_ty, "", &insn); + insn.replaceAllUsesWith(ptrtoint); + + continue; + } + + if (auto ptr_insn = llvm::dyn_cast(ptr)) { + if (!std::holds_alternative>(type_info)) { + continue; + } + + auto ptr_type = std::get>(type_info); + if (!std::holds_alternative>( + ptr_type->pointee)) { + continue; + } + + auto new_load = new llvm::LoadInst(llvm::PointerType::get(context, 0), + ptr, "", &insn); + new_load->setMetadata("anvill.type", + TypeSpecToMD(context, ptr_type->pointee)); + auto ptrtoint = new llvm::PtrToIntInst(new_load, load_ty, "", &insn); + insn.replaceAllUsesWith(ptrtoint); + + continue; + } + } +} + // Finds `(inttoptr (add (ptrtoint P), A))` and tries to convert to GEP void ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { using namespace llvm::PatternMatch; @@ -425,6 +489,7 @@ void ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::run(llvm::Function &function, llvm::FunctionAnalysisManager &fam) { + impl->ConvertLoadInt(function); impl->FoldPtrAdd(function); return llvm::PreservedAnalyses::none(); } From 754d06f3d448d96e332b1cd078d91eeb60bd3e16 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 12 Dec 2022 12:25:21 +0100 Subject: [PATCH 059/163] Report if there were any changes --- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 25 +++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 3729b32b9..e1b9de34f 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -69,8 +69,8 @@ struct ConvertPointerArithmeticToGEP::Impl { llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, UnknownType t); llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, TypeSpec type); - void ConvertLoadInt(llvm::Function &f); - void FoldPtrAdd(llvm::Function &f); + bool ConvertLoadInt(llvm::Function &f); + bool FoldPtrAdd(llvm::Function &f); Impl(TypeMap &types, StructMap &structs, MDMap &md) : types(types), @@ -328,7 +328,7 @@ llvm::StringRef ConvertPointerArithmeticToGEP::name() { } // Finds `(load i64, P)` and converts it to `(ptrtoint (load ptr, P))` -void ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { +bool ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { using namespace llvm::PatternMatch; llvm::Value *ptr; auto &context = f.getContext(); @@ -363,7 +363,7 @@ void ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { auto ptrtoint = new llvm::PtrToIntInst(new_load, load_ty, "", &insn); insn.replaceAllUsesWith(ptrtoint); - continue; + return true; } if (auto ptr_insn = llvm::dyn_cast(ptr)) { @@ -384,13 +384,15 @@ void ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { auto ptrtoint = new llvm::PtrToIntInst(new_load, load_ty, "", &insn); insn.replaceAllUsesWith(ptrtoint); - continue; + return true; } } + + return false; } // Finds `(inttoptr (add (ptrtoint P), A))` and tries to convert to GEP -void ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { +bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { using namespace llvm::PatternMatch; llvm::Value *ptr; llvm::ConstantInt *offset_const; @@ -483,14 +485,19 @@ void ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { insn.getNextNonDebugInstruction()); gep->setMetadata("anvill.type", TypeSpecToMD(context, cur_spec)); insn.replaceAllUsesWith(gep); + + return true; } + + return false; } llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::run(llvm::Function &function, llvm::FunctionAnalysisManager &fam) { - impl->ConvertLoadInt(function); - impl->FoldPtrAdd(function); - return llvm::PreservedAnalyses::none(); + bool changed = impl->ConvertLoadInt(function); + changed |= impl->FoldPtrAdd(function); + return changed ? llvm::PreservedAnalyses::none() + : llvm::PreservedAnalyses::all(); } } // namespace anvill \ No newline at end of file From 5ae55c8757fd502fe57e689d922f4e6ab4c2cbb9 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 12 Dec 2022 12:53:10 +0100 Subject: [PATCH 060/163] Improve recognition of indices --- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index e1b9de34f..668da3be3 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -391,15 +391,14 @@ bool ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { return false; } -// Finds `(inttoptr (add (ptrtoint P), A))` and tries to convert to GEP +// Finds `(add (ptrtoint P), A)` and tries to convert to `(ptrtoint (gep ...))` bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { using namespace llvm::PatternMatch; llvm::Value *ptr; llvm::ConstantInt *offset_const; auto &context = f.getContext(); auto &dl = f.getParent()->getDataLayout(); - auto pat = - m_IntToPtr(m_Add(m_PtrToInt(m_Value(ptr)), m_ConstantInt(offset_const))); + auto pat = m_Add(m_PtrToInt(m_Value(ptr)), m_ConstantInt(offset_const)); for (auto &insn : llvm::instructions(f)) { if (!match(&insn, pat)) { continue; @@ -480,11 +479,14 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { for (auto i : indices) { indices_values.push_back(llvm::ConstantInt::get(i32, i)); } - auto gep = - llvm::GetElementPtrInst::Create(pointee_type, ptr, indices_values, "", - insn.getNextNonDebugInstruction()); + auto next_insn = insn.getNextNonDebugInstruction(); + auto gep = llvm::GetElementPtrInst::Create(pointee_type, ptr, + indices_values, "", next_insn); gep->setMetadata("anvill.type", TypeSpecToMD(context, cur_spec)); - insn.replaceAllUsesWith(gep); + auto ptrtoint = new llvm::PtrToIntInst( + gep, llvm::Type::getIntNTy(context, dl.getPointerSizeInBits()), "", + next_insn); + insn.replaceAllUsesWith(ptrtoint); return true; } From b4851cd89219a1b6753068e382ca88147afea2e6 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 12 Dec 2022 14:38:22 +0100 Subject: [PATCH 061/163] Recognize scaled index access --- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 107 +++++++++++++++---- 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 668da3be3..9b03e642f 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -42,17 +42,17 @@ struct ConvertPointerArithmeticToGEP::Impl { std::optional GetTypeInfo(llvm::Value *val); llvm::Type *TypeSpecToType(llvm::LLVMContext &context, BaseType t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, - std::shared_ptr t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, - std::shared_ptr t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, - std::shared_ptr t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, - std::shared_ptr t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, - std::shared_ptr t); - llvm::Type *TypeSpecToType(llvm::LLVMContext &context, UnknownType t); + llvm::PointerType *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::ArrayType *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::FixedVectorType *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::StructType *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::FunctionType *TypeSpecToType(llvm::LLVMContext &context, + std::shared_ptr t); + llvm::IntegerType *TypeSpecToType(llvm::LLVMContext &context, UnknownType t); llvm::Type *TypeSpecToType(llvm::LLVMContext &context, TypeSpec type); llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, BaseType t); @@ -71,6 +71,7 @@ struct ConvertPointerArithmeticToGEP::Impl { bool ConvertLoadInt(llvm::Function &f); bool FoldPtrAdd(llvm::Function &f); + bool FoldScaledIndex(llvm::Function &f); Impl(TypeMap &types, StructMap &structs, MDMap &md) : types(types), @@ -119,22 +120,22 @@ ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, } } -llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( +llvm::PointerType *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( llvm::LLVMContext &context, std::shared_ptr t) { return llvm::PointerType::get(context, 0); } -llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( +llvm::ArrayType *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( llvm::LLVMContext &context, std::shared_ptr t) { return llvm::ArrayType::get(TypeSpecToType(context, t->base), t->size); } -llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( +llvm::FixedVectorType *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( llvm::LLVMContext &context, std::shared_ptr t) { return llvm::FixedVectorType::get(TypeSpecToType(context, t->base), t->size); } -llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( +llvm::StructType *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( llvm::LLVMContext &context, std::shared_ptr t) { auto &type = structs[t.get()]; if (type) { @@ -149,7 +150,7 @@ llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( return type; } -llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( +llvm::FunctionType *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( llvm::LLVMContext &context, std::shared_ptr t) { std::vector args; for (auto arg : t->arguments) { @@ -159,7 +160,7 @@ llvm::Type *ConvertPointerArithmeticToGEP::Impl::TypeSpecToType( t->is_variadic); } -llvm::Type * +llvm::IntegerType * ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, UnknownType t) { return llvm::Type::getIntNTy(context, t.size * 8); @@ -169,7 +170,10 @@ llvm::Type * ConvertPointerArithmeticToGEP::Impl::TypeSpecToType(llvm::LLVMContext &context, TypeSpec type) { return std::visit( - [this, &context](auto &&t) { return TypeSpecToType(context, t); }, type); + [this, &context](auto &&t) { + return static_cast(TypeSpecToType(context, t)); + }, + type); } TypeSpec ConvertPointerArithmeticToGEP::Impl::MDToTypeSpec(llvm::MDNode *md) { @@ -494,11 +498,78 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { return false; } +// Convert `(add (ptrtoint P), (shl I, S))` to `(ptrtoint (gep P, I))` +bool ConvertPointerArithmeticToGEP::Impl::FoldScaledIndex(llvm::Function &f) { + using namespace llvm::PatternMatch; + llvm::Value *ptr; + llvm::Value *base; + llvm::ConstantInt *shift_const; + auto &context = f.getContext(); + auto &dl = f.getParent()->getDataLayout(); + auto patL = m_Add(m_PtrToInt(m_Value(ptr)), + m_Shl(m_Value(base), m_ConstantInt(shift_const))); + auto patR = m_Add(m_Shl(m_Value(base), m_ConstantInt(shift_const)), + m_PtrToInt(m_Value(ptr))); + auto ptrint_ty = llvm::Type::getIntNTy(context, dl.getPointerSizeInBits()); + for (auto &insn : llvm::instructions(f)) { + if (!match(&insn, patL) && !match(&insn, patR)) { + continue; + } + + auto maybe_type_info = GetTypeInfo(ptr); + if (!maybe_type_info.has_value()) { + continue; + } + + auto scale = 1ull << shift_const->getZExtValue(); + auto type_info = *maybe_type_info; + + auto next_insn = insn.getNextNonDebugInstruction(); + + if (std::holds_alternative>(type_info)) { + auto array_spec = std::get>(type_info); + auto array_type = TypeSpecToType(context, array_spec); + auto elem_size = + dl.getTypeSizeInBits(array_type->getArrayElementType()) / 8; + if (scale != elem_size) { + continue; + } + + auto gep = llvm::GetElementPtrInst::Create( + array_type, ptr, {llvm::ConstantInt::get(ptrint_ty, 0), base}, "", + next_insn); + gep->setMetadata("anvill.type", TypeSpecToMD(context, array_spec->base)); + auto ptrtoint = new llvm::PtrToIntInst(gep, ptrint_ty, "", next_insn); + insn.replaceAllUsesWith(ptrtoint); + return true; + } + + if (std::holds_alternative>(type_info)) { + auto vector_spec = std::get>(type_info); + auto vector_type = TypeSpecToType(context, vector_spec); + auto elem_size = dl.getTypeSizeInBits(vector_type->getElementType()) / 8; + if (scale != elem_size) { + continue; + } + + auto gep = llvm::GetElementPtrInst::Create( + vector_type, ptr, {llvm::ConstantInt::get(ptrint_ty, 0), base}, "", + next_insn); + gep->setMetadata("anvill.type", TypeSpecToMD(context, vector_spec->base)); + auto ptrtoint = new llvm::PtrToIntInst(gep, ptrint_ty, "", next_insn); + insn.replaceAllUsesWith(ptrtoint); + return true; + } + } + return false; +} + llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::run(llvm::Function &function, llvm::FunctionAnalysisManager &fam) { bool changed = impl->ConvertLoadInt(function); changed |= impl->FoldPtrAdd(function); + changed |= impl->FoldScaledIndex(function); return changed ? llvm::PreservedAnalyses::none() : llvm::PreservedAnalyses::all(); } From a130f0619225d00ce22e6c0539c7d972cdf31b97 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 12 Dec 2022 13:43:01 -0500 Subject: [PATCH 062/163] basic stack vars --- bin/Decompile/Main.cpp | 3 +- include/anvill/Declarations.h | 8 + include/anvill/Optimize.h | 5 +- include/anvill/Passes/BasicBlockPass.h | 49 +++++ .../anvill/Passes/ReplaceStackReferences.h | 33 ++++ include/anvill/Specification.h | 19 ++ lib/CMakeLists.txt | 1 + lib/Declarations.cpp | 22 +++ lib/Lifters/BasicBlockLifter.cpp | 20 +- lib/Lifters/BasicBlockLifter.h | 4 +- lib/Lifters/CodeLifter.cpp | 8 + lib/Lifters/CodeLifter.h | 4 + lib/Lifters/FunctionLifter.cpp | 6 +- lib/Lifters/FunctionLifter.h | 3 - lib/Optimize.cpp | 5 +- lib/Passes/ReplaceStackReferences.cpp | 182 ++++++++++++++++++ lib/Specification.cpp | 17 ++ lib/Specification.h | 3 + 18 files changed, 367 insertions(+), 25 deletions(-) create mode 100644 include/anvill/Passes/BasicBlockPass.h create mode 100644 include/anvill/Passes/ReplaceStackReferences.h create mode 100644 lib/Passes/ReplaceStackReferences.cpp diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index f089da416..9fee39bd0 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -241,7 +241,8 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } - anvill::OptimizeModule(lifter, module); + + anvill::OptimizeModule(lifter, module, spec.GetBlockContexts()); int ret = EXIT_SUCCESS; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index d0e8d866b..974141f3b 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -9,6 +9,8 @@ #pragma once #include <_types/_uint64_t.h> +#include +#include #include #include @@ -202,6 +204,9 @@ class BasicBlockContext { public: virtual std::vector GetAvailableVariables() const = 0; virtual const SpecStackOffsets &GetStackOffsets() const = 0; + + + llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; }; struct FunctionDecl; @@ -271,6 +276,9 @@ struct FunctionDecl : public CallableDecl { const remill::Arch *arch); SpecBlockContext GetBlockContext(std::uint64_t addr) const; + + void + AddBBContexts(std::unordered_map &contexts) const; }; // A call site decl, as represented at a "near ABI" level. This is like a diff --git a/include/anvill/Optimize.h b/include/anvill/Optimize.h index 77327888d..c97a2a953 100644 --- a/include/anvill/Optimize.h +++ b/include/anvill/Optimize.h @@ -8,6 +8,7 @@ #pragma once +#include "anvill/Passes/BasicBlockPass.h" namespace llvm { class Module; } // namespace llvm @@ -21,7 +22,7 @@ class EntityLifter; // Optimize a module. This can be a module with semantics code, lifted // code, etc. -void OptimizeModule(const EntityLifter &lifter_context, - llvm::Module &module); +void OptimizeModule(const EntityLifter &lifter_context, llvm::Module &module, + const BasicBlockContexts &contexts); } // namespace anvill diff --git a/include/anvill/Passes/BasicBlockPass.h b/include/anvill/Passes/BasicBlockPass.h new file mode 100644 index 000000000..6edaa8d9d --- /dev/null +++ b/include/anvill/Passes/BasicBlockPass.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include "anvill/Declarations.h" + +namespace anvill { + + +class BasicBlockContexts { + public: + virtual std::optional> + GetBasicBlockContextForAddr(uint64_t addr) const = 0; +}; + +template +class BasicBlockPass : public llvm::PassInfoMixin> { + private: + const BasicBlockContexts &contexts; + + + public: + static llvm::StringRef name(void) { + return T::name(); + } + + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &AM) { + auto &bb_pass = *static_cast(this); + auto bbaddr = anvill::GetBasicBlockAddr(&F); + if (bbaddr.has_value()) { + auto bb_cont = this->contexts.GetBasicBlockContextForAddr(*bbaddr); + if (bb_cont) { + return bb_pass.runOnBasicBlockFunction(F, AM, *bb_cont); + } + } + + return llvm::PreservedAnalyses::all(); + } + + protected: + BasicBlockPass(const BasicBlockContexts &contexts) : contexts(contexts) {} +}; +} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Passes/ReplaceStackReferences.h b/include/anvill/Passes/ReplaceStackReferences.h new file mode 100644 index 000000000..57312b16c --- /dev/null +++ b/include/anvill/Passes/ReplaceStackReferences.h @@ -0,0 +1,33 @@ + +#pragma once + +#include +#include + +#include "anvill/Lifters.h" + + +namespace anvill { +/** + * @brief Replaces references to anvill_pc +- disp with a pointer to the represented local variable. + * If variable information seperatates variables that are actually overlapping this pass may separate variables in an unsound way. + */ +class ReplaceStackReferences final + : public BasicBlockPass { + private: + const EntityLifter &lifter; + + public: + ReplaceStackReferences(const BasicBlockContexts &contexts, + const EntityLifter &lifter) + : BasicBlockPass(contexts), + lifter(lifter) {} + + static llvm::StringRef name(void); + + + llvm::PreservedAnalyses + runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &); +}; +} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index f7e4fda46..dd2781976 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include #include @@ -86,6 +88,19 @@ struct VariableDecl; struct ParameterDecl; struct ValueDecl; + +class Specification; +class SpecBlockContexts : public BasicBlockContexts { + std::unordered_map contexts; + + public: + SpecBlockContexts(const Specification &spec); + + virtual std::optional> + GetBasicBlockContextForAddr(uint64_t addr) const override; +}; + + // Represents the data pulled out of a JSON (sub-)program specification. class Specification { private: @@ -170,6 +185,10 @@ class Specification { inline bool operator!=(const Specification &that) const noexcept { return impl.get() == that.impl.get(); } + + SpecBlockContexts GetBlockContexts() const { + return SpecBlockContexts(*this); + } }; } // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index a058f376d..500504c9e 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -58,6 +58,7 @@ set(anvill_passes SpreadPCMetadata TransformRemillJumpIntrinsics CombineAdjacentShifts + ReplaceStackReferences ) set(anvill_arch_HEADERS diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index c6f876106..45e26f50a 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -29,10 +29,12 @@ #include #include +#include #include #include "Arch/Arch.h" #include "Protobuf.h" +#include "anvill/Specification.h" namespace anvill { @@ -53,6 +55,26 @@ VariableDecl::DeclareInModule(const std::string &name, name); } +void FunctionDecl::AddBBContexts( + std::unordered_map &contexts) const { + for (const auto &[addr, _] : this->cfg) { + contexts.insert({addr, this->GetBlockContext(addr)}); + } +} + + +llvm::StructType * +BasicBlockContext::StructTypeFromVars(llvm::LLVMContext &llvm_context) const { + auto in_scope_locals = this->GetAvailableVariables(); + std::vector field_types; + std::transform(in_scope_locals.begin(), in_scope_locals.end(), + std::back_inserter(field_types), + [](const ParameterDecl ¶m) { return param.type; }); + + return llvm::StructType::get(llvm_context, field_types, + "sty_for_basic_block_function"); +} + // Declare this function in an LLVM module. llvm::Function * FunctionDecl::DeclareInModule(std::string_view name, diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index bc6cf7663..d7cca18be 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -330,9 +330,7 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { - auto pc_val = llvm::ConstantInt::get(address_type, addr); - auto pc_md = llvm::ValueAsMetadata::get(pc_val); - return llvm::MDNode::get(this->semantics_module->getContext(), pc_md); + return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { @@ -450,15 +448,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } -llvm::StructType *BasicBlockLifter::StructTypeFromVars( - const std::vector &in_scope_locals) const { - std::vector field_types; - std::transform(in_scope_locals.begin(), in_scope_locals.end(), - std::back_inserter(field_types), - [](const ParameterDecl ¶m) { return param.type; }); - - return llvm::StructType::create(llvm_context, field_types, - "sty_for_basic_block_function"); +llvm::StructType *BasicBlockLifter::StructTypeFromVars() const { + return this->block_context.StructTypeFromVars(this->llvm_context); } // Packs in scope variables into a struct @@ -493,7 +484,7 @@ void BasicBlockLifter::UnpackLocals( {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, field_offset)}); - auto loaded_var_val = bldr.CreateLoad(decl.type, ptr); + auto loaded_var_val = bldr.CreateLoad(decl.type, ptr, decl.name); field_offset += 1; auto new_mem_ptr = StoreNativeValue( loaded_var_val, decl, this->type_provider.Dictionary(), @@ -562,8 +553,7 @@ BasicBlockLifter::BasicBlockLifter(const BasicBlockContext &block_context, : CodeLifter(options_, semantics_module, type_specifier), block_context(block_context), block_def(block_def) { - this->var_struct_ty = - this->StructTypeFromVars(this->block_context.GetAvailableVariables()); + this->var_struct_ty = this->StructTypeFromVars(); } CallableBasicBlockFunction::CallableBasicBlockFunction( diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 8c63a20e7..0e371048d 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -45,8 +46,7 @@ class BasicBlockLifter : public CodeLifter { llvm::Function *lifted_func{nullptr}; - llvm::StructType * - StructTypeFromVars(const std::vector &in_scope_locals) const; + llvm::StructType *StructTypeFromVars() const; remill::DecodingContext ApplyContextAssignments( const std::unordered_map &assignments, diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 2e90ff4ba..3491173b8 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -163,6 +163,14 @@ void CodeLifter::InitializeStateStructureFromGlobalRegisterVariables( }); } +llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, + llvm::LLVMContext &context) const { + auto pc_val = llvm::ConstantInt::get( + remill::RecontextualizeType(address_type, context), addr); + auto pc_md = llvm::ValueAsMetadata::get(pc_val); + return llvm::MDNode::get(context, pc_md); +} + // Allocate and initialize the state structure. llvm::Value * CodeLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 0ea73d977..3bf1479a2 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -73,10 +73,14 @@ class CodeLifter { unsigned pc_annotation_id; + llvm::MDNode *GetAddrAnnotation(uint64_t addr, + llvm::LLVMContext &context) const; + public: CodeLifter(const LifterOptions &options, llvm::Module *semantics_module, const TypeTranslator &type_specifier); + CodeLifter(CodeLifter &&) = default; }; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 52ce62858..db89346d1 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -434,7 +434,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk, CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); bbfunc.CallBasicBlockFunction(builder, lifted_function_state); - + CHECK(anvill::GetBasicBlockAddr(bbfunc.GetFunction()).has_value()); auto pc = remill::LoadNextProgramCounter(llvm_blk, this->intrinsics); @@ -768,6 +768,10 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, new_version = llvm::Function::Create( type, llvm::GlobalValue::ExternalLinkage, name, target_module); remill::CloneFunctionInto(old_version, new_version); + new_version->setMetadata( + kBasicBlockMetadata, + this->GetAddrAnnotation(block_addr, module_context)); + CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); } } } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 6aaeaf26a..8b887712a 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -171,9 +171,6 @@ class FunctionLifter : public CodeLifter { // not doing annotations. llvm::MDNode *GetPCAnnotation(uint64_t pc) const; - // A metadata node that communicates that this value (should be a function represents the basic block at address x) - llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; - // Declare the function decl `decl` and return an `llvm::Function *`. The // returned function is a "high-level" function. llvm::Function *GetOrDeclareFunction(const FunctionDecl &decl); diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index ede428afa..a0b3e2fab 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,8 @@ class OurVerifierPass : public llvm::PassInfoMixin { // code, etc. // When utilizing crossRegisterProxies cleanup triggers asan -void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { +void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, + const BasicBlockContexts &contexts) { const LifterOptions &options = lifter.Options(); @@ -216,6 +218,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module) { AddRemoveStackPointerCExprs(fpm, options.stack_frame_recovery_options); //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); + fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); fpm.addPass(llvm::SROAPass()); AddCombineAdjacentShifts(fpm); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp new file mode 100644 index 000000000..b73cda7d6 --- /dev/null +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "anvill/Declarations.h" +namespace anvill { + + +llvm::StringRef ReplaceStackReferences::name(void) { + return "Replace stack references"; +} + + +// Contains a list of `load` and `store` instructions that reference +// the stack pointer +using StackPointerRegisterUsages = std::vector; + +// Enumerates all the store and load instructions that reference +// the stack +static StackPointerRegisterUsages +EnumerateStackPointerUsages(llvm::Function &function) { + StackPointerRegisterUsages output; + StackPointerResolver sp_resolver(function.getParent()); + + for (auto &basic_block : function) { + for (auto &instr : basic_block) { + for (auto i = 0u, num_ops = instr.getNumOperands(); i < num_ops; ++i) { + auto &use = instr.getOperandUse(i); + if (auto val = use.get(); llvm::isa(val) && + sp_resolver.IsRelatedToStackPointer(val)) { + output.emplace_back(&use); + } + } + } + } + + return output; +} + + +struct BasicBlockVar { + size_t index; + ParameterDecl decl; +}; + + +struct StackVariable { + // offset into this variable + std::int64_t offset; + BasicBlockVar decl; +}; + +class StackModel { + private: + std::map frame; + + public: + StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { + + size_t index = 0; + for (const auto &v : cont.GetAvailableVariables()) { + if (v.mem_reg && v.mem_reg->name == arch->StackPointerRegisterName()) { + this->InsertFrameVar(v.mem_offset, index, v); + } + index += 1; + } + } + + + std::optional GetParamLte(std::int64_t off) { + auto prec = this->frame.lower_bound(off); + if (prec == this->frame.end()) { + return std::nullopt; + } + + if (prec->first == off) { + return {prec->second}; + } + + if (prec == this->frame.begin()) { + return std::nullopt; + } + + return {(prec--)->second}; + } + + std::optional GetOverlappingParam(std::int64_t off) { + + auto vlte = GetParamLte(off); + + if (!vlte.has_value()) { + return std::nullopt; + } + + auto offset_into_var = off - vlte->decl.mem_offset; + if (offset_into_var <= (vlte->decl.type->getPrimitiveSizeInBits() / 8)) { + return {{offset_into_var, *vlte}}; + } + + return std::nullopt; + } + + + bool VarOverlaps(std::int64_t off) { + return GetOverlappingParam(off).has_value(); + } + + + void InsertFrameVar(std::int64_t off, size_t index, ParameterDecl var) { + CHECK(var.type->getPrimitiveSizeInBits() != 0); + + if (VarOverlaps(off)) { + LOG(FATAL) << "Inserting variable that overlaps with current frame"; + } + + this->frame.insert({off, {index, var}}); + } +}; + +llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( + llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const BasicBlockContext &cont) { + NullCrossReferenceResolver resolver; + CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); + StackModel smodel(cont, this->lifter.Options().arch); + + auto vstate = F.getArg(remill::kStatePointerArgNum); + + std::vector> to_replace_vars; + + for (auto use : EnumerateStackPointerUsages(F)) { + const auto reference = folder.TryResolveReferenceWithCaching(use->get()); + if (!reference.is_valid || !reference.references_stack_pointer) { + continue; + } + + // The offset from the stack pointer. Force to a 32-bit, then sign-extend. + int64_t stack_offset = reference.Displacement(this->lifter.DataLayout()); + + auto referenced_variable = smodel.GetOverlappingParam(stack_offset); + //TODO(Ian) handle nonzero offset + if (referenced_variable->offset == 0 && + llvm::isa(use->get()->getType())) { + to_replace_vars.push_back({use, referenced_variable->decl}); + } + } + + for (auto [use, v] : to_replace_vars) { + llvm::IRBuilder<> ir(&F.getEntryBlock(), F.getEntryBlock().begin()); + if (auto *insn = llvm::dyn_cast(use->get())) { + ir.SetInsertPoint(insn); + } + + auto i32 = llvm::IntegerType::get(F.getContext(), 32); + auto g = ir.CreateGEP( + cont.StructTypeFromVars(F.getContext()), vstate, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, v.index)}); + use->set(g); + } + F.dump(); + CHECK(!llvm::verifyFunction(F, &llvm::errs())); + + return to_replace_vars.empty() ? llvm::PreservedAnalyses::all() + : llvm::PreservedAnalyses::none(); +} +} // namespace anvill \ No newline at end of file diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 4d137e53e..a82abfaf3 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -414,6 +414,23 @@ void Specification::ForEachSymbol( } } +SpecBlockContexts::SpecBlockContexts(const Specification &spec) { + spec.ForEachFunction([this](auto decl) { + decl->AddBBContexts(this->contexts); + return true; + }); +} + +std::optional> +SpecBlockContexts::GetBasicBlockContextForAddr(uint64_t addr) const { + auto cont = this->contexts.find(addr); + if (cont == this->contexts.end()) { + return std::nullopt; + } + + return std::cref(cont->second); +} + // Call `cb` on each function in the spec, until `cb` returns `false`. void Specification::ForEachFunction( std::function)> cb) const { diff --git a/lib/Specification.h b/lib/Specification.h index 61dd194f3..17db6ac26 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -14,10 +14,13 @@ #include #include +#include #include #include #include +#include "anvill/Passes/BasicBlockPass.h" + namespace llvm { class LLVMContext; } // namespace llvm From ecc0586e2f6ac3225737fc16dd38148ee5981735 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 12 Dec 2022 13:43:28 -0500 Subject: [PATCH 063/163] remove old decomp stack --- lib/Passes/RecoverBasicStackFrame.cpp | 550 -------------------------- 1 file changed, 550 deletions(-) delete mode 100644 lib/Passes/RecoverBasicStackFrame.cpp diff --git a/lib/Passes/RecoverBasicStackFrame.cpp b/lib/Passes/RecoverBasicStackFrame.cpp deleted file mode 100644 index f8cced3b4..000000000 --- a/lib/Passes/RecoverBasicStackFrame.cpp +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "Utils.h" - -namespace anvill { -namespace { - -// Describes an instruction that accesses the stack pointer through the -// `__anvill_sp` symbol. -struct StackPointerUse final { - inline explicit StackPointerUse(llvm::Use *use_, std::uint64_t type_size_, - std::int64_t stack_offset_) - : use(use_), - type_size(type_size_), - stack_offset(stack_offset_) {} - - // An operand inside of a particular instruction, where `use->getUser()` - // is an `llvm::Instruction`, and `use->get()` is a value related to the - // stack pointer. - llvm::Use *const use; - - // Operand size - const std::uint64_t type_size; - - // Stack offset referenced - const std::int64_t stack_offset; -}; - -// Contains a list of `load` and `store` instructions that reference -// the stack pointer -using StackPointerRegisterUsages = std::vector; - -// This structure contains the stack size, along with the lower and -// higher bounds of the offsets, and all the instructions that have -// been analyzed -struct StackFrameAnalysis final { - - // A list of uses that reference the stack pointer - std::vector instruction_uses; - - // Lowest SP-relative offset - std::int64_t lowest_offset{}; - - // Highest SP-relative offset - std::int64_t highest_offset{}; - - // Stack frame size - std::size_t size{}; -}; - -// Enumerates all the store and load instructions that reference -// the stack -static StackPointerRegisterUsages EnumerateStackPointerUsages( - llvm::Function &function) { - StackPointerRegisterUsages output; - StackPointerResolver sp_resolver(function.getParent()); - - for (auto &basic_block : function) { - for (auto &instr : basic_block) { - for (auto i = 0u, num_ops = instr.getNumOperands(); i < num_ops; ++i) { - auto &use = instr.getOperandUse(i); - if (auto val = use.get(); llvm::isa(val) && - sp_resolver.IsRelatedToStackPointer(val)) { - output.emplace_back(&use); - } - } - } - } - - return output; -} - -static constexpr uint64_t kMax16 = std::numeric_limits::max(); -static constexpr uint64_t kMax32 = std::numeric_limits::max(); - -// Analyzes the stack frame, determining the relative boundaries and -// collecting the instructions that operate on the stack pointer -static StackFrameAnalysis AnalyzeStackFrame( - llvm::Function &function, const StackFrameRecoveryOptions &options) { - - // The CrossReferenceResolver can accumulate all the offsets - // applied to the stack pointer symbol for us - auto module = function.getParent(); - auto &data_layout = module->getDataLayout(); - - NullCrossReferenceResolver resolver; - CrossReferenceFolder folder(resolver, data_layout); - - // Pre-initialize the stack limits - StackFrameAnalysis output; - output.highest_offset = std::numeric_limits::min(); - output.lowest_offset = std::numeric_limits::max(); - - // Go through each one of the instructions we have found - for (const auto use : EnumerateStackPointerUsages(function)) { - - // Skip any operand that is not related to the stack pointer. - const auto val = use->get(); - - // Attempt to resolve the constant expression into an offset. If we can't - // resolve it, then it probably means that there was a comparison or - // something, and we should unfold it. - const auto reference = folder.TryResolveReferenceWithCaching(val); - if (!reference.is_valid || !reference.references_stack_pointer) { - continue; - } - - // The offset from the stack pointer. Force to a 32-bit, then sign-extend. - int64_t stack_offset = reference.Displacement(data_layout); - if (options.max_stack_frame_size <= kMax16) { - stack_offset = static_cast(stack_offset); - } else if (options.max_stack_frame_size <= kMax32) { - stack_offset = static_cast(stack_offset); - } - - // Update the boundaries, based on the offset we have found - std::uint64_t type_size = - data_layout.getTypeAllocSize(val->getType()).getFixedSize(); - - // In the case of `store` instructions, we want to record the size of the - // stored value as the type size or updating the stack offset. - if (auto store = llvm::dyn_cast(use->getUser())) { - if (use->getOperandNo() == 1) { - const auto stored_type = store->getValueOperand()->getType(); - type_size = data_layout.getTypeAllocSize(stored_type).getFixedSize(); - } - - // In the case of `load` instructions, we want to redord the size of the - // loaded value. - } else if (auto load = llvm::dyn_cast(use->getUser())) { - type_size = data_layout.getTypeAllocSize(load->getType()).getFixedSize(); - } - - output.highest_offset = - std::max(output.highest_offset, - stack_offset + static_cast(type_size)); - - output.lowest_offset = std::min(output.lowest_offset, stack_offset); - - // Save the operand use. - output.instruction_uses.emplace_back(use, type_size, stack_offset); - } - - output.size = - static_cast(output.highest_offset - output.lowest_offset); - - return output; -} - -// Generates a simple, byte-array based, stack frame for the given -// function -static llvm::StructType *GenerateStackFrameType( - const llvm::Function &function, const StackFrameRecoveryOptions &options, - const StackFrameAnalysis &stack_frame_analysis, std::size_t padding_bytes, - llvm::IntegerType *el_type) { - - const auto element_size = static_cast( - el_type->getPrimitiveSizeInBits().getFixedSize() / 8u); - - // Generate a stack frame type with a name that matches the anvill ABI - auto function_name = function.getName().str(); - auto stack_frame_type_name = function_name + kStackFrameTypeNameSuffix; - - // Make sure this type is not defined already - auto module = function.getParent(); - const auto &dl = module->getDataLayout(); - auto &context = module->getContext(); - - auto stack_frame_type = llvm::StructType::getTypeByName( - context, stack_frame_type_name); - - // Determine how many bytes we should allocate. We may have been - // asked to add some additional padding. We don't care how it is - // accessed right now, we just add to the total size of the final - // stack frame - auto stack_frame_size = std::max( - 1u, - std::min(options.max_stack_frame_size, - padding_bytes + stack_frame_analysis.size)); - - // Round the stack frame to a multiple of the address size. - auto address_size = dl.getPointerSize(0); - const unsigned slot_size = std::lcm(address_size, element_size); - const auto num_slots = (stack_frame_size + (slot_size - 1u)) / - slot_size; - stack_frame_size = num_slots * slot_size; - - if (stack_frame_type != nullptr) { - assert(dl.getTypeAllocSize(stack_frame_type).getKnownMinSize() <= - stack_frame_size); - return stack_frame_type; - } - - // Generate the stack frame using an array of address-sized elements. - auto arr_type = llvm::ArrayType::get(el_type, num_slots); - - llvm::Type *stack_frame_types[] = {arr_type}; - return llvm::StructType::create(stack_frame_types, stack_frame_type_name); -} - -// Generates a new symbolic stack value. -static llvm::GlobalVariable *GetStackSymbolicByteValue( - llvm::Module &module, const StackFrameRecoveryOptions &options, - std::int32_t offset, llvm::IntegerType *type) { - - // Create a new name - auto value_name = kSymbolicStackFrameValuePrefix; - if (options.stack_grows_down) { - if (offset < 0) { - value_name += "minus_"; - } else if (offset > 0) { - value_name += "plus_"; - } - } else { - if (offset < 0) { - value_name += "plus_"; - } else if (offset > 0) { - value_name += "minus_"; - } - } - - value_name += std::to_string(abs(offset)); - - auto gv = module.getGlobalVariable(value_name); - if (gv) { - CHECK_EQ(gv->getValueType(), type); - return gv; - } else { - return new llvm::GlobalVariable( - module, type, false, llvm::GlobalValue::ExternalLinkage, nullptr, - value_name); - } -} - -// Patches the function, replacing the load/store instructions so that -// they operate on the new stack frame type we generated. -static void UpdateFunction( - llvm::Function &function, const StackFrameRecoveryOptions &options, - const StackFrameAnalysis &stack_frame_analysis) { - - StackFrameStructureInitializationProcedure init_strategy = - options.stack_frame_struct_init_procedure; - - std::size_t stack_frame_lower_padding = - options.stack_frame_lower_padding; - - std::size_t stack_frame_higher_padding = - options.stack_frame_higher_padding; - - auto &context = function.getContext(); - auto module = function.getParent(); - const auto &dl = module->getDataLayout(); - auto address_size = dl.getPointerSize(0); - auto addr_type = llvm::Type::getIntNTy(context, address_size * 8u); - - // Generate a new stack frame type, using a byte array inside a - // StructType - auto padding_bytes = stack_frame_lower_padding + stack_frame_higher_padding; - unsigned stack_frame_word_size = options.stack_frame_word_size; - if (!stack_frame_word_size) { - stack_frame_word_size = address_size; - } - llvm::IntegerType * const stack_frame_word_type = llvm::IntegerType::get( - context, stack_frame_word_size * 8u); - auto stack_frame_type = GenerateStackFrameType( - function, options, stack_frame_analysis, padding_bytes, - stack_frame_word_type); - - int64_t base_stack_offset; - if (options.stack_grows_down) { - base_stack_offset = stack_frame_analysis.lowest_offset - - static_cast(stack_frame_lower_padding); - } else { - base_stack_offset = stack_frame_analysis.lowest_offset - - static_cast(stack_frame_lower_padding); - } - - // Take the first instruction as an insert pointer for the - // IRBuilder, and then create an `alloca` instruction to - // generate our new stack frame - auto &entry_block = function.getEntryBlock(); - auto &insert_point = *entry_block.getFirstInsertionPt(); - - llvm::IRBuilder<> builder(&insert_point); - auto stack_frame_alloca = builder.CreateAlloca(stack_frame_type); - - // Annotate the stack. - if (options.stack_offset_metadata_name) { - - // TODO(pag): Account for the stack size having actually been clamped down - // to a smaller range. - - int64_t base = 0; - - // If the stack grows down, the higher offsets represent accesses to - // the callee's stack frame. These will be positive. - if (options.stack_grows_down) { - base = stack_frame_analysis.highest_offset; - base += static_cast(options.stack_frame_higher_padding); - base -= static_cast(stack_frame_word_size); - - // If the stack grows up, the lower offsets represent accesses to the - // callee's stack frame. These will be negative. - } else { - base = stack_frame_analysis.lowest_offset; - base -= static_cast(options.stack_frame_lower_padding); - base += static_cast(stack_frame_word_size); - } - - // NOTE(pag): Base points to the highest or lowest address-sized integer - // that can be stored on the stack. - - auto md_id = context.getMDKindID(kAnvillStackZero); - auto adjust_val = llvm::ConstantInt::get( - addr_type, static_cast(base), true); - auto adjust_md = llvm::ValueAsMetadata::get(adjust_val); - stack_frame_alloca->setMetadata( - md_id, llvm::MDNode::get(context, adjust_md)); - } - - // When we have padding enabled in the configuration, we must - // make sure that accesses are still correctly centered around the - // stack pointer we were given (i.e.: we don't alter where the - // `__anvill_stack_0` is supposed to land). - // - // This is true regardless of which initialization method we use, but - // the following example assumes kSymbolic since it makes the - // explanation easier to follow. - // - // [higher addresses] - // - // [__anvill_stack_plus_3 <- optional higher padding <- alloca - // - // __anvill_stack_plus_2 - // __anvill_stack_plus_1 - // __anvill_stack_0 <- __anvill_sp - // __anvill_stack_minus_1 - // __anvill_stack_minus_2 - // - // [__anvill_stack_minus_3 <- optional lower padding - // - // [lower addresses] - - auto total_stack_frame_size = padding_bytes + stack_frame_analysis.size; - - // Pre-initialize the stack frame if we have been requested to do so. This - // covers the frame padding bytes as well. - // - // Look at the definition for the `StackFrameStructureInitializationProcedure` - // enum class to get more details on each initialization strategy. - switch (init_strategy) { - case StackFrameStructureInitializationProcedure::kZeroes: { - - // Initialize to zero - auto null_value = llvm::Constant::getNullValue(stack_frame_type); - builder.CreateStore(null_value, stack_frame_alloca); - break; - } - - case StackFrameStructureInitializationProcedure::kUndef: { - - // Mark the stack values as explicitly undefined - auto undef_value = llvm::UndefValue::get(stack_frame_type); - builder.CreateStore(undef_value, stack_frame_alloca); - break; - } - - case StackFrameStructureInitializationProcedure::kSymbolic: { - - // Generate symbolic values for each byte in the stack frame - auto &module = *function.getParent(); - - auto current_offset = base_stack_offset; - - llvm::Value *gep_indexes[] = {builder.getInt32(0), builder.getInt32(0), - nullptr}; - - for (auto i = 0U; i < total_stack_frame_size; - i += stack_frame_word_size) { - - gep_indexes[2] = builder.getInt32(i / stack_frame_word_size); - DCHECK_EQ(stack_frame_word_type, - llvm::GetElementPtrInst::getIndexedType(stack_frame_type, - gep_indexes)); - auto stack_frame_byte = - builder.CreateGEP(stack_frame_type, stack_frame_alloca, - gep_indexes); - - auto symbolic_value_ptr = GetStackSymbolicByteValue( - module, options, current_offset, stack_frame_word_type); - - current_offset += static_cast(stack_frame_word_size); - - auto symbolic_value = builder.CreateLoad(stack_frame_word_type, - symbolic_value_ptr); - builder.CreateStore(symbolic_value, stack_frame_byte); - } - - break; - } - - case StackFrameStructureInitializationProcedure::kNone: { - - // Skip initialization - break; - } - } - - // The stack analysis we have performed earlier contains all the - // operand uses we have to update. - for (auto &sp_use : stack_frame_analysis.instruction_uses) { - - const auto obj = sp_use.use->get(); - - // Convert the `__anvill_sp`-relative offset to a 0-based index - // into our stack frame type - auto zero_based_offset = - sp_use.stack_offset - stack_frame_analysis.lowest_offset; - - // If we added padding, adjust the displacement value. We just have - // to add the amount of bytes we have inserted before the stack pointer - zero_based_offset += stack_frame_lower_padding; - - // Create a GEP instruction that accesses the new stack frame we - // created based on the relative offset - // - // GEP indices for the stack_frame_ptr are constants. It can safely - // inserted after the alloca instead of before the instruction using - // it. - // - // As a reminder, the stack frame type is a StructType that contains - // an ArrayType with int8 elements - llvm::Value *stack_frame_ptr = builder.CreateGEP( - stack_frame_type, stack_frame_alloca, - {builder.getInt32(0), builder.getInt32(0), - builder.getInt32(zero_based_offset / stack_frame_word_size)}); - - auto from_val = sp_use.use->get(); - CopyMetadataTo(from_val, stack_frame_ptr); - - llvm::IntegerType *el_type = nullptr; - unsigned scale = 0; - auto missing = static_cast(zero_based_offset) % - stack_frame_word_size; - switch (missing) { - case 7: - case 5: - case 3: - case 1: - el_type = llvm::Type::getInt8Ty(context); - scale = 1; - break; - - case 4: - el_type = llvm::Type::getInt32Ty(context); - scale = 4; - break; - - case 6: - case 2: - el_type = llvm::Type::getInt16Ty(context); - scale = 2; - break; - case 0: - break; - default: - LOG(FATAL) - << "Unsupported address size: " << missing; - break; - } - - if (el_type) { - llvm::PointerType *ptr_type = llvm::PointerType::get(context, 0); - stack_frame_ptr = builder.CreateBitOrPointerCast( - stack_frame_ptr, ptr_type); - CopyMetadataTo(from_val, stack_frame_ptr); - stack_frame_ptr = builder.CreateGEP( - el_type, stack_frame_ptr, builder.getInt32(missing / scale)); - CopyMetadataTo(from_val, stack_frame_ptr); - } - - stack_frame_ptr = - builder.CreateBitOrPointerCast(stack_frame_ptr, obj->getType()); - CopyMetadataTo(from_val, stack_frame_ptr); - - // We now have to replace the operand; it is not correct to use - // `replaceAllUsesWith` on the operand, because the scope of a constant - // could be bigger than just the function we are using. - sp_use.use->set(stack_frame_ptr); - } -} - -} // namespace - -llvm::PreservedAnalyses RecoverBasicStackFrame::run( - llvm::Function &function, llvm::FunctionAnalysisManager &fam) { - - if (function.isDeclaration()) { - return llvm::PreservedAnalyses::all(); - } - - // Analyze the stack frame first, enumerating the instructions referencing - // the __anvill_sp symbol and determining the boundaries of the stack memory - StackFrameAnalysis stack_frame_analysis = AnalyzeStackFrame(function, options); - if (stack_frame_analysis.instruction_uses.empty()) { - return llvm::PreservedAnalyses::all(); - } - - // It is now time to patch the function. This method will take the stack - // analysis and use it to generate a stack frame type and update all the - // instructions - UpdateFunction(function, options, stack_frame_analysis); - - // Analyze the __anvill_sp usage again; this time, the resulting - // instruction list should be empty - assert(EnumerateStackPointerUsages(function).empty()); - - return llvm::PreservedAnalyses::none(); -} - -llvm::StringRef RecoverBasicStackFrame::name(void) { - return llvm::StringRef("RecoverBasicStackFrame"); -} - -void AddRecoverBasicStackFrame(llvm::FunctionPassManager &fpm, - const StackFrameRecoveryOptions &options) { - fpm.addPass(RecoverBasicStackFrame(options)); -} -} // namespace anvill From 4997be365cb3757dd4c652356da5b10c78b6e7a9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 12 Dec 2022 13:49:26 -0500 Subject: [PATCH 064/163] cast --- lib/Passes/ReplaceStackReferences.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index b73cda7d6..50b4fce35 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -109,7 +109,8 @@ class StackModel { } auto offset_into_var = off - vlte->decl.mem_offset; - if (offset_into_var <= (vlte->decl.type->getPrimitiveSizeInBits() / 8)) { + if (offset_into_var <= static_cast( + vlte->decl.type->getPrimitiveSizeInBits() / 8)) { return {{offset_into_var, *vlte}}; } From 5513b8cca3ebcfab1767decef7e934aac3c173f3 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 12 Dec 2022 16:56:09 -0500 Subject: [PATCH 065/163] fix off by ones in stack --- .../anvill/Passes/RecoverBasicStackFrame.h | 40 --- lib/CMakeLists.txt | 1 - lib/Lifters/BasicBlockLifter.cpp | 23 +- lib/Lifters/BasicBlockLifter.h | 3 +- lib/Lifters/FunctionLifter.cpp | 38 --- lib/Passes/ReplaceStackReferences.cpp | 37 +- tests/anvill_passes/CMakeLists.txt | 1 - .../src/RecoverStackFrameInformation.cpp | 317 ------------------ 8 files changed, 49 insertions(+), 411 deletions(-) delete mode 100644 include/anvill/Passes/RecoverBasicStackFrame.h delete mode 100644 tests/anvill_passes/src/RecoverStackFrameInformation.cpp diff --git a/include/anvill/Passes/RecoverBasicStackFrame.h b/include/anvill/Passes/RecoverBasicStackFrame.h deleted file mode 100644 index 022225609..000000000 --- a/include/anvill/Passes/RecoverBasicStackFrame.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -namespace anvill { - -class StackFrameRecoveryOptions; - -// This function pass recovers stack information by analyzing the usage -// of the `__anvill_sp` symbol -class RecoverBasicStackFrame final - : public llvm::PassInfoMixin { - - // Lifting options - const StackFrameRecoveryOptions &options; - - public: - - // Function pass entry point - llvm::PreservedAnalyses run(llvm::Function &func, - llvm::FunctionAnalysisManager &fam); - - // Returns the pass name - static llvm::StringRef name(void); - - inline explicit RecoverBasicStackFrame( - const StackFrameRecoveryOptions &options_) - : options(options_) {} -}; - -} // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 500504c9e..667edb540 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -40,7 +40,6 @@ set(anvill_passes Constraints LowerRemillMemoryAccessIntrinsics LowerRemillUndefinedIntrinsics - RecoverBasicStackFrame RemoveCompilerBarriers RemoveDelaySlotIntrinsics RemoveErrorIntrinsics diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index d7cca18be..233da97a6 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -50,7 +50,7 @@ remill::DecodingContext BasicBlockLifter::ApplyContextAssignments( llvm::CallInst *BasicBlockLifter::AddCallFromBasicBlockFunctionToLifted( llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics) { + const remill::IntrinsicTable &intrinsics, llvm::Value *pc_hint) { auto func = source_block->getParent(); llvm::IRBuilder<> ir(source_block); std::array args; @@ -58,7 +58,14 @@ llvm::CallInst *BasicBlockLifter::AddCallFromBasicBlockFunctionToLifted( NthArgument(func, remill::kMemoryPointerArgNum); args[remill::kStatePointerArgNum] = NthArgument(func, remill::kStatePointerArgNum); - args[remill::kPCArgNum] = NthArgument(func, remill::kPCArgNum); + + if (pc_hint) { + args[remill::kPCArgNum] = pc_hint; + } else { + args[remill::kPCArgNum] = + remill::LoadNextProgramCounter(source_block, this->intrinsics); + } + return ir.CreateCall(dest_func, args); } @@ -161,8 +168,16 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( llvm::IRBuilder<> builder(block); if (std::holds_alternative(override)) { auto cc = std::get(override); - this->AddCallFromBasicBlockFunctionToLifted( - block, this->intrinsics.function_call, this->intrinsics); + + if (cc.target_address.has_value()) { + this->AddCallFromBasicBlockFunctionToLifted( + block, this->intrinsics.function_call, this->intrinsics, + this->options.program_counter_init_procedure(builder, this->pc_reg, + *cc.target_address)); + } else { + this->AddCallFromBasicBlockFunctionToLifted( + block, this->intrinsics.function_call, this->intrinsics); + } if (!cc.stop) { auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); auto npc = remill::LoadNextProgramCounterRef(block); diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 0e371048d..eb1027863 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -68,7 +68,8 @@ class BasicBlockLifter : public CodeLifter { llvm::CallInst *AddCallFromBasicBlockFunctionToLifted( llvm::BasicBlock *source_block, llvm::Function *dest_func, - const remill::IntrinsicTable &intrinsics); + const remill::IntrinsicTable &intrinsics, + llvm::Value *next_pc_hint = nullptr); std::pair LoadFunctionReturnAddress(const remill::Instruction &inst, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index db89346d1..c5a72bd72 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -91,44 +91,6 @@ GetMemoryEscapeFunc(const remill::IntrinsicTable &intrinsics) { kMemoryPointerEscapeFunction.data(), module); } -// We're calling a remill intrinsic and we want to "mute" the escape of the -// `State` pointer by replacing it with an `undef` value. This permits -// optimizations while allowing us to still observe what reaches the `pc` -// argument of the intrinsic. This is valuable for function return intrinsics, -// because it lets us verify that the value that we initialize into the return -// address location actually reaches the `pc` parameter of the -// `__remill_function_return`. -static void MuteStateEscape(llvm::CallInst *call) { - auto state_ptr_arg = call->getArgOperand(remill::kStatePointerArgNum); - auto undef_val = llvm::UndefValue::get(state_ptr_arg->getType()); - call->setArgOperand(remill::kStatePointerArgNum, undef_val); -} - -// This returns a special anvill built-in used to describe jumps tables -// inside lifted code; It takes the address type to generate the function -// parameters of correct type. -static llvm::Function *GetAnvillSwitchFunc(llvm::Module &module, - llvm::Type *type) { - - const auto &func_name = kAnvillSwitchCompleteFunc; - - auto func = module.getFunction(func_name); - if (func != nullptr) { - return func; - } - - llvm::Type *func_parameters[] = {type}; - - auto func_type = llvm::FunctionType::get(type, func_parameters, true); - - func = llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, - func_name, module); - - func->addFnAttr(llvm::Attribute::ReadNone); - - return func; -} - // Annotate and instruction with the `id` annotation if that instruction // is unannotated. static void AnnotateInstruction(llvm::Instruction *inst, unsigned id, diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 50b4fce35..3b0ae1d64 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -71,12 +71,17 @@ class StackModel { std::map frame; public: + static uint64_t GetParamDeclSize(const ParameterDecl &decl) { + CHECK(decl.type->getPrimitiveSizeInBits() != 0); + return decl.type->getPrimitiveSizeInBits() / 8; + } + StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { size_t index = 0; for (const auto &v : cont.GetAvailableVariables()) { if (v.mem_reg && v.mem_reg->name == arch->StackPointerRegisterName()) { - this->InsertFrameVar(v.mem_offset, index, v); + this->InsertFrameVar(index, v); } index += 1; } @@ -97,11 +102,13 @@ class StackModel { return std::nullopt; } - return {(prec--)->second}; + + auto prev_decl = (--prec)->second; + CHECK(prev_decl.decl.mem_offset <= off); + return {prev_decl}; } std::optional GetOverlappingParam(std::int64_t off) { - auto vlte = GetParamLte(off); if (!vlte.has_value()) { @@ -109,8 +116,8 @@ class StackModel { } auto offset_into_var = off - vlte->decl.mem_offset; - if (offset_into_var <= static_cast( - vlte->decl.type->getPrimitiveSizeInBits() / 8)) { + if (offset_into_var < static_cast( + vlte->decl.type->getPrimitiveSizeInBits() / 8)) { return {{offset_into_var, *vlte}}; } @@ -119,18 +126,30 @@ class StackModel { bool VarOverlaps(std::int64_t off) { + + return GetOverlappingParam(off).has_value(); } - void InsertFrameVar(std::int64_t off, size_t index, ParameterDecl var) { + void InsertFrameVar(size_t index, ParameterDecl var) { CHECK(var.type->getPrimitiveSizeInBits() != 0); + if (VarOverlaps(var.mem_offset) || + VarOverlaps(var.mem_offset + GetParamDeclSize(var) - 1)) { + + auto oparam = GetOverlappingParam(var.mem_offset); + if (!VarOverlaps(var.mem_offset)) { + oparam = + GetOverlappingParam(var.mem_offset + GetParamDeclSize(var) - 1); + } - if (VarOverlaps(off)) { - LOG(FATAL) << "Inserting variable that overlaps with current frame"; + LOG(FATAL) << "Inserting variable that overlaps with current frame " + << var.mem_offset << " with size: " << GetParamDeclSize(var) + << " Overlaps with " << oparam->decl.decl.mem_offset + << " with size " << GetParamDeclSize(oparam->decl.decl); } - this->frame.insert({off, {index, var}}); + this->frame.insert({var.mem_offset, {index, var}}); } }; diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index 6a8011866..8c4c78b25 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -12,7 +12,6 @@ add_executable(test_anvill_passes src/Utils.h src/Utils.cpp - src/RecoverStackFrameInformation.cpp src/SinkSelectionsIntoBranchTargets.cpp src/SplitStackFrameAtReturnAddress.cpp src/InstructionFolderPass.cpp diff --git a/tests/anvill_passes/src/RecoverStackFrameInformation.cpp b/tests/anvill_passes/src/RecoverStackFrameInformation.cpp deleted file mode 100644 index 0a7bd53da..000000000 --- a/tests/anvill_passes/src/RecoverStackFrameInformation.cpp +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "RecoverBasicStackFrame.cpp" -#include "Utils.h" - -namespace anvill { - -TEST_SUITE("RecoverBasicStackFrame") { - TEST_CASE("Run the whole pass on a well-formed function") { - static const StackFrameStructureInitializationProcedure - kInitStackSettings[] = { - StackFrameStructureInitializationProcedure::kNone, - StackFrameStructureInitializationProcedure::kZeroes, - StackFrameStructureInitializationProcedure::kUndef, - StackFrameStructureInitializationProcedure::kSymbolic}; - - static const std::size_t kTestPaddingSettings[] = {0, 32, 64}; - - for (const auto &platform : GetSupportedPlatforms()) { - for (auto init_strategy : kInitStackSettings) { - for (auto padding_bytes : kTestPaddingSettings) { - auto context = anvill::CreateContextWithOpaquePointers(); - auto module = - LoadTestData(*context, "RecoverStackFrameInformation.ll"); - - REQUIRE(module != nullptr); - - auto arch = - remill::Arch::Build(context.get(), remill::GetOSName(platform.os), - remill::GetArchName(platform.arch)); - - REQUIRE(arch != nullptr); - - auto ctrl_flow_provider = - anvill::NullControlFlowProvider(); - - TypeDictionary tyDict(*context); - - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options( - arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); - - lift_options.stack_frame_recovery_options.stack_frame_struct_init_procedure = init_strategy; - lift_options.stack_frame_recovery_options.stack_frame_lower_padding = - lift_options.stack_frame_recovery_options.stack_frame_higher_padding = padding_bytes / 2U; - - CHECK(RunFunctionPass( - module.get(), RecoverBasicStackFrame(lift_options.stack_frame_recovery_options))); - - } - } - } - } - - - SCENARIO("Function analysis can recreate a simple, byte-array frame type") { - - GIVEN("a lifted function without stack information") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*context, "RecoverStackFrameInformation.ll"); - REQUIRE(module != nullptr); - - - auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); - REQUIRE(arch != nullptr); - - auto ctrl_flow_provider = - anvill::NullControlFlowProvider(); - - TypeDictionary tyDict(*context); - - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options( - arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); - - - auto &function_list = module->getFunctionList(); - auto function_it = - std::find_if(function_list.begin(), function_list.end(), - - [](const llvm::Function &function) -> bool { - return !function.empty(); - }); - - REQUIRE(function_it != function_list.end()); - auto &function = *function_it; - WHEN("enumerating stack pointer usages") { - auto stack_ptr_usages = - EnumerateStackPointerUsages(function); - - THEN( - "all the uses for the instruction operands referencing the __anvill_sp symbol are returned") { - - // From the test data, you can see we have 12 instructions referencing - // the `__anvill_sp` symbol. Two of these, are `store` instructions - // that have the symbol on both operands. - CHECK(stack_ptr_usages.size() == 14U); - } - } - - WHEN("analyzing the stack frame") { - auto stack_frame_analysis = - AnalyzeStackFrame(function, lift_options.stack_frame_recovery_options); - - THEN("lowest and highest relative offsets are returned") { - - // From the test data, you can see we have 12 instructions referencing - // the `__anvill_sp` symbol - // - // The boundaries we should find are: - // __anvill_sp - 16 - 12 = -28 - // __anvill_sp + 12 = 12 - // - // The high boundary however is not 12, because we are writing a 32-bit - // integer; we have to add sizeof(i32) to it, so it becomes 16 - // - // low = -28 - // high = 16 - // size = 44 - - CHECK(stack_frame_analysis.lowest_offset == -28); - CHECK(stack_frame_analysis.highest_offset == 16); - CHECK(stack_frame_analysis.size == 44U); - - // Usages of the `__anvill_sp` symbol is 14, because two of the 12 - // instructions we have are referencing the stack from both - // operands - CHECK(stack_frame_analysis.instruction_uses.size() == 14U); - } - } - - WHEN("creating a new stack frame with no padding bytes") { - auto stack_frame_analysis = AnalyzeStackFrame( - function, lift_options.stack_frame_recovery_options); - auto stack_frame_word_type = lift_options.arch->AddressType(); - auto stack_frame_type = GenerateStackFrameType( - function, lift_options.stack_frame_recovery_options, - stack_frame_analysis, 0, stack_frame_word_type); - - THEN("a StructType containing a word array is returned") { - REQUIRE(stack_frame_type->getNumElements() == 1U); - - auto function_name = function.getName().str(); - auto expected_frame_type_name = - function_name + kStackFrameTypeNameSuffix; - REQUIRE(stack_frame_type->getName().str() == - expected_frame_type_name); - - auto first_elem_type = stack_frame_type->getElementType(0U); - REQUIRE(first_elem_type->isArrayTy()); - - auto byte_array_type = - llvm::dyn_cast(first_elem_type); - REQUIRE(byte_array_type != nullptr); - - CHECK(stack_frame_analysis.size == 44u); - - auto word_array_size = byte_array_type->getNumElements(); - std::cout << word_array_size << std::endl; - // type is always address size - CHECK(word_array_size == (48u / (lift_options.arch->address_size / 8))); - - auto module = function.getParent(); - auto data_layout = module->getDataLayout(); - auto frame_type_size = data_layout.getTypeAllocSize(stack_frame_type); - CHECK(frame_type_size == 48U); - } - } - - WHEN("creating a new stack frame with additional padding bytes") { - auto stack_frame_analysis = AnalyzeStackFrame( - function, lift_options.stack_frame_recovery_options); - auto stack_frame_word_type = lift_options.arch->AddressType(); - auto stack_frame_type = GenerateStackFrameType( - function, lift_options.stack_frame_recovery_options, - stack_frame_analysis, 128U, stack_frame_word_type); - - THEN( - "a StructType containing a word array along with the padding is returned") { - REQUIRE(stack_frame_type->getNumElements() == 1U); - - auto function_name = function.getName().str(); - auto expected_frame_type_name = - function_name + kStackFrameTypeNameSuffix; - REQUIRE(stack_frame_type->getName().str() == - expected_frame_type_name); - - auto first_elem_type = stack_frame_type->getElementType(0U); - REQUIRE(first_elem_type->isArrayTy()); - - auto byte_array_type = - llvm::dyn_cast(first_elem_type); - REQUIRE(byte_array_type != nullptr); - - CHECK(stack_frame_analysis.size == 44u); - - auto word_array_size = byte_array_type->getNumElements(); - CHECK(word_array_size == (176u / (lift_options.arch->address_size / 8))); - - auto module = function.getParent(); - auto data_layout = module->getDataLayout(); - auto frame_type_size = data_layout.getTypeAllocSize(stack_frame_type); - CHECK(frame_type_size == 176U); - } - } - } - } - - SCENARIO("Applying stack frame recovery") { - GIVEN("a well formed function") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*context, "RecoverStackFrameInformation.ll"); - REQUIRE(module != nullptr); - - auto &function_list = module->getFunctionList(); - auto function_it = - std::find_if(function_list.begin(), function_list.end(), - - [](const llvm::Function &function) -> bool { - return !function.empty(); - }); - - REQUIRE(function_it != function_list.end()); - auto &function = *function_it; - - - auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); - REQUIRE(arch != nullptr); - - auto ctrl_flow_provider = - anvill::NullControlFlowProvider(); - - TypeDictionary tyDict(*context); - - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options( - arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); - - WHEN("recovering the stack frame") { - auto stack_frame_analysis = AnalyzeStackFrame(function, lift_options.stack_frame_recovery_options); - auto arch = remill::Arch::Build(context.get(), remill::kOSLinux, - remill::kArchAMD64); - - lift_options.stack_frame_recovery_options.stack_frame_struct_init_procedure = - StackFrameStructureInitializationProcedure::kZeroes; - UpdateFunction( - function, lift_options.stack_frame_recovery_options, stack_frame_analysis); - - THEN("the function is updated to use the new stack frame structure") { - auto &entry_block = function.getEntryBlock(); - - // Find the `alloca` instruction that should appear - // as the first instruction in the entry block - llvm::AllocaInst *alloca_inst{nullptr}; - - { - auto first_instr_it = entry_block.begin(); - REQUIRE(first_instr_it != entry_block.end()); - - auto first_instr = &(*first_instr_it); - - alloca_inst = llvm::dyn_cast(first_instr); - } - - CHECK(alloca_inst != nullptr); - - // We have 12 instructions referencing the `__anvill_sp` symbol; however, two - // of those are `store` operations that have 2 references each. - // - // We should then have 14 GEP instructions - std::size_t frame_gep_count{0U}; - for (const auto &instr : entry_block) { - auto gep_instr = llvm::dyn_cast(&instr); - if (gep_instr == nullptr) { - continue; - } - - ++frame_gep_count; - } - - CHECK(frame_gep_count == 14U); - - // If we run a second stack analysis, we should no longer find any - // stack frame operation to recover - stack_frame_analysis = AnalyzeStackFrame(function, lift_options.stack_frame_recovery_options); - - CHECK(stack_frame_analysis.instruction_uses.empty()); - } - } - } - } -} - -} // namespace anvill From a93fb2e982d2be2fe3de9087661598b442711a13 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 13 Dec 2022 12:34:52 -0500 Subject: [PATCH 066/163] add function lifting --- bin/Decompile/Main.cpp | 2 +- include/anvill/Declarations.h | 11 +-- include/anvill/Optimize.h | 6 +- include/anvill/Passes/RemoveCallIntrinsics.h | 44 ++++++++++ include/anvill/Utils.h | 21 +++++ lib/CMakeLists.txt | 1 + lib/Declarations.cpp | 34 ++++---- lib/Lifters/CodeLifter.cpp | 1 + lib/Lifters/FunctionLifter.cpp | 8 -- lib/Lifters/FunctionLifter.h | 5 -- lib/Optimize.cpp | 8 +- lib/Passes/RemoveCallIntrinsics.cpp | 65 ++++++++++++++ lib/Utils.cpp | 91 ++++++++++++-------- 13 files changed, 221 insertions(+), 76 deletions(-) create mode 100644 include/anvill/Passes/RemoveCallIntrinsics.h create mode 100644 lib/Passes/RemoveCallIntrinsics.cpp diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 9fee39bd0..585eeae02 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) { } - anvill::OptimizeModule(lifter, module, spec.GetBlockContexts()); + anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); int ret = EXIT_SUCCESS; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 974141f3b..2195c33a4 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -10,6 +10,7 @@ #include <_types/_uint64_t.h> #include +#include #include #include @@ -179,11 +180,11 @@ struct CallableDecl { // Interpret `target` as being the function to call, and call it from within // a basic block in a lifted bitcode function. Returns the new value of the // memory pointer. - llvm::Value * - CallFromLiftedBlock(llvm::Value *target, const anvill::TypeDictionary &types, - const remill::IntrinsicTable &intrinsics, - llvm::BasicBlock *block, llvm::Value *state_ptr, - llvm::Value *mem_ptr) const; + llvm::Value *CallFromLiftedBlock(llvm::Value *target, + const anvill::TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &, llvm::Value *state_ptr, + llvm::Value *mem_ptr) const; // Try to create a callable decl from a protobuf default callable decl // specification. Returns a string error if something went wrong. diff --git a/include/anvill/Optimize.h b/include/anvill/Optimize.h index c97a2a953..0dda52ac8 100644 --- a/include/anvill/Optimize.h +++ b/include/anvill/Optimize.h @@ -8,7 +8,10 @@ #pragma once +#include + #include "anvill/Passes/BasicBlockPass.h" + namespace llvm { class Module; } // namespace llvm @@ -23,6 +26,7 @@ class EntityLifter; // Optimize a module. This can be a module with semantics code, lifted // code, etc. void OptimizeModule(const EntityLifter &lifter_context, llvm::Module &module, - const BasicBlockContexts &contexts); + const BasicBlockContexts &contexts, + const anvill::Specification &spec); } // namespace anvill diff --git a/include/anvill/Passes/RemoveCallIntrinsics.h b/include/anvill/Passes/RemoveCallIntrinsics.h new file mode 100644 index 000000000..2c3658b5b --- /dev/null +++ b/include/anvill/Passes/RemoveCallIntrinsics.h @@ -0,0 +1,44 @@ + +#pragma once + +#include +#include +#include + +#include "anvill/Lifters.h" +#include "anvill/Specification.h" + + +namespace anvill { +/** + * @brief Attempts to remove call intrinsics by identifying a type for the target of a remill_call and lifting the arguments + * types are either provided by a recovered entity or folding the reference to an address that has an override type. + */ +class RemoveCallIntrinsics final + : public IntrinsicPass, + public llvm::PassInfoMixin { + private: + const CrossReferenceResolver &xref_resolver; + const Specification &spec; + const EntityLifter &lifter; + + public: + RemoveCallIntrinsics(const CrossReferenceResolver &xref_resolver, + const Specification &spec, const EntityLifter &lifter) + : xref_resolver(xref_resolver), + spec(spec), + lifter(lifter) {} + + llvm::PreservedAnalyses runOnIntrinsic(llvm::CallInst *indirectJump, + llvm::FunctionAnalysisManager &am, + llvm::PreservedAnalyses); + + + static llvm::PreservedAnalyses INIT_RES; + + + static bool isTargetInstrinsic(const llvm::CallInst *callinsn); + static llvm::StringRef name(); +}; + +} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index eaa42abfc..4798a9f99 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include @@ -87,6 +88,18 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); +llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, + llvm::Value *mem_ptr); + + +void StoreNativeValueToRegister(llvm::Value *native_val, + const remill::Register *reg, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr); + void StoreNativeValueToRegister(llvm::Value *native_val, const remill::Register *reg, const TypeDictionary &types, @@ -94,6 +107,14 @@ void StoreNativeValueToRegister(llvm::Value *native_val, llvm::BasicBlock *in_block, llvm::Value *state_ptr); + +llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, + llvm::Value *mem_ptr); + + // Produce one or more instructions in `in_block` to store the // native value `native_val` into the lifted state associated // with `decl`. diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 667edb540..8f9b5c04b 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -58,6 +58,7 @@ set(anvill_passes TransformRemillJumpIntrinsics CombineAdjacentShifts ReplaceStackReferences + RemoveCallIntrinsics ) set(anvill_arch_HEADERS diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 45e26f50a..9d5767e79 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -145,28 +145,26 @@ const SpecStackOffsets &SpecBlockContext::GetStackOffsets() const { // memory pointer. llvm::Value *CallableDecl::CallFromLiftedBlock( llvm::Value *target, const anvill::TypeDictionary &types, - const remill::IntrinsicTable &intrinsics, llvm::BasicBlock *block, + const remill::IntrinsicTable &intrinsics, llvm::IRBuilder<> &ir, llvm::Value *state_ptr, llvm::Value *mem_ptr) const { - auto module = block->getModule(); + auto module = ir.GetInsertBlock()->getModule(); auto &context = module->getContext(); CHECK_EQ(&context, &(target->getContext())); CHECK_EQ(&context, &(state_ptr->getContext())); CHECK_EQ(&context, &(mem_ptr->getContext())); CHECK_EQ(&context, &(types.u.named.void_->getContext())); - llvm::IRBuilder<> ir(block); - // Go and get a pointer to the stack pointer register, so that we can // later store our computed return value stack pointer to it. auto sp_reg = arch->RegisterByName(arch->StackPointerRegisterName()); - const auto ptr_to_sp = sp_reg->AddressOf(state_ptr, block); - ir.SetInsertPoint(block); + const auto ptr_to_sp = sp_reg->AddressOf(state_ptr, ir.GetInsertBlock()); + // Go and compute the value of the stack pointer on exit from // the function, which will be based off of the register state // on entry to the function. - auto new_sp_base = return_stack_pointer->AddressOf(state_ptr, block); - ir.SetInsertPoint(block); + auto new_sp_base = + return_stack_pointer->AddressOf(state_ptr, ir.GetInsertBlock()); const auto sp_val_on_exit = ir.CreateAdd( ir.CreateLoad(return_stack_pointer->type, new_sp_base), @@ -177,14 +175,14 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( llvm::SmallVector param_vals; // Get the return address. - auto ret_addr = LoadLiftedValue(return_address, types, intrinsics, block, + auto ret_addr = LoadLiftedValue(return_address, types, intrinsics, ir, state_ptr, mem_ptr); CHECK(ret_addr && !llvm::isa_and_nonnull(ret_addr)); // Get the parameters. for (const auto ¶m_decl : params) { - const auto val = LoadLiftedValue(param_decl, types, intrinsics, block, - state_ptr, mem_ptr); + const auto val = + LoadLiftedValue(param_decl, types, intrinsics, ir, state_ptr, mem_ptr); if (auto inst_val = llvm::dyn_cast(val)) { inst_val->setName(param_decl.name); } @@ -208,8 +206,8 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( if (returns.size() == 1) { auto call_ret = ret_val; - mem_ptr = StoreNativeValue(call_ret, returns.front(), types, intrinsics, - block, state_ptr, mem_ptr); + mem_ptr = StoreNativeValue(call_ret, returns.front(), types, intrinsics, ir, + state_ptr, mem_ptr); // There are possibly multiple return values (or zero). Unpack the // return value (it will be a struct type) into its components and @@ -219,18 +217,16 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( for (const auto &ret_decl : returns) { unsigned indexes[] = {index}; auto elem_val = ir.CreateExtractValue(ret_val, indexes); - mem_ptr = StoreNativeValue(elem_val, ret_decl, types, intrinsics, block, + mem_ptr = StoreNativeValue(elem_val, ret_decl, types, intrinsics, ir, state_ptr, mem_ptr); index += 1; } } - // Store the return address, and computed return stack pointer. - ir.SetInsertPoint(block); - ir.CreateStore( - ret_addr, - remill::FindVarInFunction(block, remill::kNextPCVariableName).first); + ir.CreateStore(ret_addr, remill::FindVarInFunction( + ir.GetInsertBlock(), remill::kNextPCVariableName) + .first); ir.CreateStore(sp_val_on_exit, ptr_to_sp); if (is_noreturn) { diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 3491173b8..c97e133c0 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -298,4 +298,5 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { ClearVariableNames(inf); } + } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index c5a72bd72..643d548ef 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -100,14 +100,6 @@ static void AnnotateInstruction(llvm::Instruction *inst, unsigned id, } } -static void AnnotateInstruction(llvm::Value *val, unsigned id, - llvm::MDNode *annot) { - if (auto inst = llvm::dyn_cast(val)) { - if (!inst->getMetadata(id)) { - inst->setMetadata(id, annot); - } - } -} // Annotate and instruction with the `id` annotation if that instruction // is unannotated. diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 8b887712a..7d9c70e7f 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -214,11 +214,6 @@ class FunctionLifter : public CodeLifter { std::optional &delayed_inst, llvm::BasicBlock *block); - // Call `pc` in `block`, treating it as a callable declaration `decl`. - // Returns the new value of the memory pointer (after it is stored to - // `MEMORY`). - llvm::Value *CallCallableDecl(llvm::BasicBlock *block, llvm::Value *pc, - CallableDecl decl); // Try to resolve `target_pc` to a lifted function, and introduce // a function call to that address in `block`. Failing this, add a call diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index a0b3e2fab..839abeb56 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +70,8 @@ #include #include +#include "anvill/Specification.h" + namespace anvill { //// TODO(pag): NewGVN passes in debug build of LLVM on challenge 5. @@ -104,7 +107,8 @@ class OurVerifierPass : public llvm::PassInfoMixin { // When utilizing crossRegisterProxies cleanup triggers asan void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, - const BasicBlockContexts &contexts) { + const BasicBlockContexts &contexts, + const anvill::Specification &spec) { const LifterOptions &options = lifter.Options(); @@ -229,6 +233,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // but it comes up often enough for lifted code. AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); + fpm.addPass(anvill::RemoveCallIntrinsics(xr, spec, lifter)); + fpm.addPass(llvm::SROAPass()); AddBranchRecovery(fpm); pb.crossRegisterProxies(lam, fam, cam, mam); diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp new file mode 100644 index 000000000..ed77269f4 --- /dev/null +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +namespace anvill { +llvm::StringRef RemoveCallIntrinsics::name(void) { + return "Remove call intrinsics."; +} + + +namespace {} + +llvm::PreservedAnalyses +RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, + llvm::FunctionAnalysisManager &am, + llvm::PreservedAnalyses prev) { + + CHECK(remillFunctionCall->getNumOperands() == 4); + auto target_func = remillFunctionCall->getArgOperand(1); + auto state_ptr = remillFunctionCall->getArgOperand(0); + auto mem_ptr = remillFunctionCall->getArgOperand(2); + + CrossReferenceFolder xref_folder( + this->xref_resolver, + remillFunctionCall->getFunction()->getParent()->getDataLayout()); + auto ra = xref_folder.TryResolveReferenceWithClearedCache(target_func); + + if (ra.references_entity || // Related to an existing lifted entity. + ra.references_global_value || // Related to a global var/func. + ra.references_program_counter) { // Related to `__anvill_pc`. + + // TODO(Ian): ignoring callsite decls for now + auto fdecl = spec.FunctionAt(ra.u.address); + if (fdecl) { + llvm::IRBuilder<> ir(remillFunctionCall->getParent()); + ir.SetInsertPoint(remillFunctionCall); + + const remill::IntrinsicTable table( + remillFunctionCall->getFunction()->getParent()); + auto new_mem = fdecl->CallFromLiftedBlock( + target_func, lifter.Options().TypeDictionary(), table, ir, state_ptr, + mem_ptr); + + remillFunctionCall->replaceAllUsesWith(new_mem); + remillFunctionCall->eraseFromParent(); + prev.intersect(llvm::PreservedAnalyses::none()); + } + } + + return prev; +} + + +llvm::PreservedAnalyses RemoveCallIntrinsics::INIT_RES = + llvm::PreservedAnalyses::all(); + + +bool RemoveCallIntrinsics::isTargetInstrinsic(const llvm::CallInst *callinsn) { + return callinsn->getCalledFunction() != nullptr && + callinsn->getName().startswith("__remill_function_call"); +} +} // namespace anvill \ No newline at end of file diff --git a/lib/Utils.cpp b/lib/Utils.cpp index d7dc05129..afa636cd5 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -242,26 +242,22 @@ void CopyMetadataTo(llvm::Value *src, llvm::Value *dst) { } } + void StoreNativeValueToRegister(llvm::Value *native_val, const remill::Register *reg, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, - llvm::BasicBlock *in_block, - llvm::Value *state_ptr) { - auto func = in_block->getParent(); + llvm::IRBuilder<> &ir, llvm::Value *state_ptr) { + auto func = ir.GetInsertBlock()->getParent(); auto module = func->getParent(); auto &context = module->getContext(); auto reg_type = remill::RecontextualizeType(reg->type, context); - auto ptr_to_reg = reg->AddressOf(state_ptr, in_block); - llvm::IRBuilder<> ir(in_block); + auto ptr_to_reg = reg->AddressOf(state_ptr, ir.GetInsertBlock()); llvm::StoreInst *store = nullptr; - auto ipoint = ir.GetInsertPoint(); - auto iblock = ir.GetInsertBlock(); auto adapted_val = types.ConvertValueToType(ir, native_val, reg_type); - ir.SetInsertPoint(iblock, ipoint); if (adapted_val) { store = ir.CreateStore(adapted_val, ptr_to_reg); @@ -275,16 +271,24 @@ void StoreNativeValueToRegister(llvm::Value *native_val, CopyMetadataTo(native_val, store); } -// Produce one or more instructions in `in_block` to store the -// native value `native_val` into the lifted state associated -// with `decl`. +void StoreNativeValueToRegister(llvm::Value *native_val, + const remill::Register *reg, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::BasicBlock *in_block, + llvm::Value *state_ptr) { + llvm::IRBuilder<> ir(in_block); + StoreNativeValueToRegister(native_val, reg, types, intrinsics, ir, state_ptr); +} + + llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, - llvm::BasicBlock *in_block, - llvm::Value *state_ptr, llvm::Value *mem_ptr) { + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, + llvm::Value *mem_ptr) { - auto func = in_block->getParent(); + auto func = ir.GetInsertBlock()->getParent(); auto module = func->getParent(); auto &context = module->getContext(); @@ -295,17 +299,16 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, // Store it to a register. if (decl.reg) { - StoreNativeValueToRegister(native_val, decl.reg, types, intrinsics, - in_block, state_ptr); + StoreNativeValueToRegister(native_val, decl.reg, types, intrinsics, ir, + state_ptr); return mem_ptr; // Store it to memory. } else if (decl.mem_reg) { auto mem_reg_type = remill::RecontextualizeType(decl.mem_reg->type, context); - auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, in_block); + auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir.GetInsertBlock()); - llvm::IRBuilder<> ir(in_block); llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); CopyMetadataTo(native_val, addr); @@ -324,29 +327,41 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, CopyMetadataTo(native_val, addr); } - return remill::StoreToMemory(intrinsics, in_block, native_val, mem_ptr, - addr); + return remill::StoreToMemory(intrinsics, ir, native_val, mem_ptr, addr); // Store to memory at an absolute offset. } else if (decl.mem_offset) { - llvm::IRBuilder<> ir(in_block); const auto addr = llvm::ConstantInt::get( remill::NthArgument(intrinsics.read_memory_8, 1u)->getType(), static_cast(decl.mem_offset), false); - return remill::StoreToMemory(intrinsics, in_block, native_val, mem_ptr, - addr); + return remill::StoreToMemory(intrinsics, ir, native_val, mem_ptr, addr); } else { return llvm::UndefValue::get(mem_ptr->getType()); } } +// Produce one or more instructions in `in_block` to store the +// native value `native_val` into the lifted state associated +// with `decl`. +llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::BasicBlock *in_block, + llvm::Value *state_ptr, llvm::Value *mem_ptr) { + + llvm::IRBuilder<> ir(in_block); + return StoreNativeValue(native_val, decl, types, intrinsics, ir, state_ptr, + mem_ptr); +} + + llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, - llvm::BasicBlock *in_block, llvm::Value *state_ptr, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, llvm::Value *mem_ptr) { - auto func = in_block->getParent(); + auto func = ir.GetInsertBlock()->getParent(); auto module = func->getParent(); auto &context = module->getContext(); CHECK_EQ(module, intrinsics.read_memory_8->getParent()); @@ -356,8 +371,7 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, // Load it out of a register. if (decl.reg) { auto reg_type = remill::RecontextualizeType(decl.reg->type, context); - auto ptr_to_reg = decl.reg->AddressOf(state_ptr, in_block); - llvm::IRBuilder<> ir(in_block); + auto ptr_to_reg = decl.reg->AddressOf(state_ptr, ir.GetInsertBlock()); auto reg = ir.CreateLoad(reg_type, ptr_to_reg); CopyMetadataTo(mem_ptr, reg); auto ipoint = ir.GetInsertPoint(); @@ -380,8 +394,7 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, } else if (decl.mem_reg) { auto mem_reg_type = remill::RecontextualizeType(decl.mem_reg->type, context); - auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, in_block); - llvm::IRBuilder<> ir(in_block); + auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir.GetInsertBlock()); llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); CopyMetadataTo(mem_ptr, addr); if (0ll < decl.mem_offset) { @@ -399,22 +412,18 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, CopyMetadataTo(mem_ptr, addr); } - auto val = - remill::LoadFromMemory(intrinsics, in_block, decl_type, mem_ptr, addr); - ir.SetInsertPoint(in_block); + auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); + return types.ConvertValueToType(ir, val, decl_type); // Store to memory at an absolute offset. } else if (decl.mem_offset) { - llvm::IRBuilder<> ir(in_block); const auto addr = llvm::ConstantInt::get( remill::NthArgument(intrinsics.read_memory_8, 1u)->getType(), static_cast(decl.mem_offset), false); - auto val = - remill::LoadFromMemory(intrinsics, in_block, decl_type, mem_ptr, addr); + auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); CopyMetadataTo(mem_ptr, val); - ir.SetInsertPoint(in_block); return types.ConvertValueToType(ir, val, decl_type); } else { @@ -424,6 +433,16 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, } } + +llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::BasicBlock *in_block, llvm::Value *state_ptr, + llvm::Value *mem_ptr) { + + llvm::IRBuilder ir(in_block); + return LoadLiftedValue(decl, types, intrinsics, ir, state_ptr, mem_ptr); +} + namespace { // Returns `true` if `reg_name` appears to be the name of the stack pointer From 824923a502a41a3e1379f1f02ed47c266f1f97cc Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 13 Dec 2022 16:24:30 -0500 Subject: [PATCH 067/163] replace calls --- bin/Decompile/Main.cpp | 1 + include/anvill/Utils.h | 2 ++ lib/Declarations.cpp | 8 ++++---- lib/Lifters/FunctionLifter.cpp | 7 ++++++- lib/Passes/RemoveCallIntrinsics.cpp | 16 +++++++++------- lib/Passes/Utils.cpp | 11 +++++++---- lib/Passes/Utils.h | 8 ++++---- lib/Utils.cpp | 19 +++++++++++++++++++ 8 files changed, 52 insertions(+), 20 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 585eeae02..0bb78e154 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 4798a9f99..eeead9ae5 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -94,6 +94,8 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, llvm::Value *mem_ptr); +void CloneIntrinsicsFromModule(llvm::Module &from, llvm::Module &into); + void StoreNativeValueToRegister(llvm::Value *native_val, const remill::Register *reg, const TypeDictionary &types, diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 9d5767e79..1165e6cec 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -223,10 +223,10 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( } } - - ir.CreateStore(ret_addr, remill::FindVarInFunction( - ir.GetInsertBlock(), remill::kNextPCVariableName) - .first); + // TODO(Ian): ... well ok so we already did stuff assuming the PC was one way since we lifted below it. + //ir.CreateStore(ret_addr, remill::FindVarInFunction( + // ir.GetInsertBlock(), remill::kNextPCVariableName) + // .first); ir.CreateStore(sp_val_on_exit, ptr_to_sp); if (is_noreturn) { diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 643d548ef..b61057cfe 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -127,7 +127,12 @@ FunctionLifter::FunctionLifter(const LifterOptions &options_, std::unique_ptr semantics_module) : CodeLifter(options_, semantics_module.get(), this->type_specifier), semantics_module(std::move(semantics_module)), - type_specifier(options_.TypeDictionary(), options_.arch) {} + type_specifier(options_.TypeDictionary(), options_.arch) { + + + anvill::CloneIntrinsicsFromModule(*this->semantics_module, + *this->options.module); +} llvm::BranchInst * diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index ed77269f4..0509fcfff 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -17,7 +17,6 @@ llvm::PreservedAnalyses RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, llvm::FunctionAnalysisManager &am, llvm::PreservedAnalyses prev) { - CHECK(remillFunctionCall->getNumOperands() == 4); auto target_func = remillFunctionCall->getArgOperand(1); auto state_ptr = remillFunctionCall->getArgOperand(0); @@ -27,22 +26,24 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, this->xref_resolver, remillFunctionCall->getFunction()->getParent()->getDataLayout()); auto ra = xref_folder.TryResolveReferenceWithClearedCache(target_func); - + remillFunctionCall->dump(); if (ra.references_entity || // Related to an existing lifted entity. ra.references_global_value || // Related to a global var/func. ra.references_program_counter) { // Related to `__anvill_pc`. // TODO(Ian): ignoring callsite decls for now auto fdecl = spec.FunctionAt(ra.u.address); - if (fdecl) { + auto entity = this->xref_resolver.EntityAtAddress(ra.u.address); + if (fdecl && entity) { llvm::IRBuilder<> ir(remillFunctionCall->getParent()); ir.SetInsertPoint(remillFunctionCall); const remill::IntrinsicTable table( remillFunctionCall->getFunction()->getParent()); - auto new_mem = fdecl->CallFromLiftedBlock( - target_func, lifter.Options().TypeDictionary(), table, ir, state_ptr, - mem_ptr); + + auto new_mem = + fdecl->CallFromLiftedBlock(entity, lifter.Options().TypeDictionary(), + table, ir, state_ptr, mem_ptr); remillFunctionCall->replaceAllUsesWith(new_mem); remillFunctionCall->eraseFromParent(); @@ -60,6 +61,7 @@ llvm::PreservedAnalyses RemoveCallIntrinsics::INIT_RES = bool RemoveCallIntrinsics::isTargetInstrinsic(const llvm::CallInst *callinsn) { return callinsn->getCalledFunction() != nullptr && - callinsn->getName().startswith("__remill_function_call"); + callinsn->getCalledFunction()->getName().startswith( + "__remill_function_call"); } } // namespace anvill \ No newline at end of file diff --git a/lib/Passes/Utils.cpp b/lib/Passes/Utils.cpp index 5abbbb788..a12db847b 100644 --- a/lib/Passes/Utils.cpp +++ b/lib/Passes/Utils.cpp @@ -46,7 +46,8 @@ llvm::Value *ConvertConstantToPointer(llvm::IRBuilder<> &ir, // Cast a pointer to a pointer type. if (auto ptr_ty = llvm::dyn_cast(type)) { if (ptr_ty->getAddressSpace() != dest_ptr_ty->getAddressSpace()) { - const auto new_ptr_ty = llvm::PointerType::get(ir.getContext(), dest_ptr_ty->getAddressSpace()); + const auto new_ptr_ty = llvm::PointerType::get( + ir.getContext(), dest_ptr_ty->getAddressSpace()); val_to_convert = llvm::ConstantExpr::getAddrSpaceCast(val_to_convert, new_ptr_ty); ptr_ty = new_ptr_ty; @@ -89,7 +90,8 @@ llvm::Value *ConvertValueToPointer(llvm::IRBuilder<> &ir, // Cast a pointer to a pointer type. if (auto ptr_ty = llvm::dyn_cast(type)) { if (ptr_ty->getAddressSpace() != dest_ptr_ty->getAddressSpace()) { - const auto new_ptr_ty = llvm::PointerType::get(ir.getContext(), dest_ptr_ty->getAddressSpace()); + const auto new_ptr_ty = llvm::PointerType::get( + ir.getContext(), dest_ptr_ty->getAddressSpace()); auto dest = ir.CreateAddrSpaceCast(val_to_convert, new_ptr_ty); CopyMetadataTo(val_to_convert, dest); val_to_convert = dest; @@ -100,8 +102,8 @@ llvm::Value *ConvertValueToPointer(llvm::IRBuilder<> &ir, return val_to_convert; } else { - auto dest = remill::BuildPointerToOffset( - ir, val_to_convert, 0, dest_ptr_ty); + auto dest = + remill::BuildPointerToOffset(ir, val_to_convert, 0, dest_ptr_ty); CopyMetadataTo(val_to_convert, dest); return dest; } @@ -180,6 +182,7 @@ bool BasicBlockIsSane(llvm::BasicBlock *block) { return true; } + llvm::PreservedAnalyses ConvertBoolToPreserved(bool modified) { return modified ? llvm::PreservedAnalyses::none() : llvm::PreservedAnalyses::all(); diff --git a/lib/Passes/Utils.h b/lib/Passes/Utils.h index 38ee1eacc..25e4240d4 100644 --- a/lib/Passes/Utils.h +++ b/lib/Passes/Utils.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -16,8 +17,6 @@ #include #include -#include - namespace llvm { class CallBase; class Function; @@ -30,8 +29,8 @@ namespace anvill { namespace { template -static std::vector SelectInstructions( - llvm::Function &function) { +static std::vector +SelectInstructions(llvm::Function &function) { std::vector output; for (auto &instruction : llvm::instructions(function)) { @@ -70,6 +69,7 @@ std::string GetFunctionIR(llvm::Function &func); // Returns the module's IR std::string GetModuleIR(llvm::Module &module); + llvm::PreservedAnalyses ConvertBoolToPreserved(bool); // Returns the pointer to the function that lets us overwrite the return diff --git a/lib/Utils.cpp b/lib/Utils.cpp index afa636cd5..074d19393 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +243,24 @@ void CopyMetadataTo(llvm::Value *src, llvm::Value *dst) { } } +void CloneIntrinsicsFromModule(llvm::Module &from, llvm::Module &into) { + //CHECK(&from.getContext() == &into.getContext()); + auto func = from.getFunction("__remill_intrinsics"); + if (!func) { + LOG(FATAL) << "No intrinsics bundle in module"; + } + + if (into.getFunction("__remill_intrinsics")) { + return; + } + + auto nfunc = llvm::Function::Create( + llvm::cast(remill::RecontextualizeType( + func->getFunctionType(), into.getContext())), + llvm::GlobalValue::ExternalLinkage, func->getName(), into); + + remill::CloneFunctionInto(func, nfunc); +} void StoreNativeValueToRegister(llvm::Value *native_val, const remill::Register *reg, From dfd26225308e1fe8ff42aa2805b6c7937e56681d Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 13 Dec 2022 16:29:34 -0500 Subject: [PATCH 068/163] start add return pass --- include/anvill/ABI.h | 4 ++++ .../ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h | 3 +++ 2 files changed, 7 insertions(+) create mode 100644 include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index 17b7c78ad..fa8d60e0e 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -84,4 +84,8 @@ extern const std::string kAnvillStackZero; extern const std::string kBasicBlockMetadata; + +/// Intrinsic that acts like a return instruction but leaves both the basic block and the parent function. +extern const std::string kAnvillBasicBlockReturn; + } // namespace anvill diff --git a/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h b/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h new file mode 100644 index 000000000..25399fdaa --- /dev/null +++ b/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h @@ -0,0 +1,3 @@ + + +class ReplaceRemillFunctionReturnsWithAnvillFunctionReturns {}; \ No newline at end of file From e0bf8f275cfc6b55f023a2d6ec65dc6bfddbe9cf Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 13 Dec 2022 20:10:27 -0500 Subject: [PATCH 069/163] remove optimistic remove of function returns --- include/anvill/Declarations.h | 4 + ...FunctionReturnsWithAnvillFunctionReturns.h | 29 +++++++- lib/ABI.cpp | 3 + lib/CMakeLists.txt | 1 + lib/Declarations.cpp | 5 ++ lib/Optimize.cpp | 4 +- ...nctionReturnsWithAnvillFunctionReturns.cpp | 73 +++++++++++++++++++ lib/Passes/Utils.cpp | 14 ++++ lib/Passes/Utils.h | 2 + lib/Utils.cpp | 11 +-- 10 files changed, 137 insertions(+), 9 deletions(-) create mode 100644 lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 2195c33a4..c8750f095 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -206,6 +206,8 @@ class BasicBlockContext { virtual std::vector GetAvailableVariables() const = 0; virtual const SpecStackOffsets &GetStackOffsets() const = 0; + virtual const std::vector &ReturnValue() const = 0; + llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; }; @@ -222,6 +224,8 @@ class SpecBlockContext : public BasicBlockContext { offsets(std::move(offsets)) {} virtual std::vector GetAvailableVariables() const override; virtual const SpecStackOffsets &GetStackOffsets() const override; + + virtual const std::vector &ReturnValue() const override; }; // A function decl, as represented at a "near ABI" level. To be specific, diff --git a/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h b/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h index 25399fdaa..7ebd07d4f 100644 --- a/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h +++ b/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h @@ -1,3 +1,30 @@ +#include +#include +#include "anvill/Lifters.h" -class ReplaceRemillFunctionReturnsWithAnvillFunctionReturns {}; \ No newline at end of file +namespace anvill { +// An intrinsic pass that currently assumes that the function returns to its caller, +// replacing the sound remill return with an anvill_return that returns the value specified by this +// functions ABI. +// TODO(Ian): make intrinsic pass compose with basic block passes somehow +class ReplaceRemillFunctionReturnsWithAnvillFunctionReturns + : public BasicBlockPass< + ReplaceRemillFunctionReturnsWithAnvillFunctionReturns> { + private: + const EntityLifter &lifter; + + public: + ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( + const BasicBlockContexts &contexts, const EntityLifter &lifter) + : BasicBlockPass(contexts), + lifter(lifter) {} + + static llvm::StringRef name(void); + + + llvm::PreservedAnalyses + runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &); +}; +} // namespace anvill \ No newline at end of file diff --git a/lib/ABI.cpp b/lib/ABI.cpp index 082527208..c50cfedc9 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -84,4 +84,7 @@ const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); +const std::string kAnvillBasicBlockReturn(kAnvillNamePrefix + + "basic_block_function_return"); + } // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8f9b5c04b..42b06092f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -59,6 +59,7 @@ set(anvill_passes CombineAdjacentShifts ReplaceStackReferences RemoveCallIntrinsics + ReplaceRemillFunctionReturnsWithAnvillFunctionReturns ) set(anvill_arch_HEADERS diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 1165e6cec..1c48eb48f 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -116,6 +117,10 @@ FunctionDecl::DeclareInModule(std::string_view name, return func; } +const std::vector &SpecBlockContext::ReturnValue() const { + return this->decl.returns; +} + std::vector SpecBlockContext::GetAvailableVariables() const { std::vector decls; for (auto p : this->decl.params) { diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 839abeb56..f172ae01a 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -50,6 +50,7 @@ #include #include #include +#include // clang-format on #include @@ -191,7 +192,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // fpm.addPass(llvm::SinkingPass()); fpm.addPass(llvm::SimplifyCFGPass()); fpm.addPass(llvm::InstCombinePass()); - + fpm.addPass(anvill::ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( + contexts, lifter)); AddSinkSelectionsIntoBranchTargets(fpm); AddRemoveUnusedFPClassificationCalls(fpm); AddRemoveDelaySlotIntrinsics(fpm); diff --git a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp new file mode 100644 index 000000000..a67b9ce11 --- /dev/null +++ b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "Utils.h" +#include "anvill/Declarations.h" + +namespace anvill { +llvm::StringRef +ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::name(void) { + return "ReplaceRemillFunctionReturnsWithAnvillFunctionReturns"; +} + + +llvm::PreservedAnalyses +ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( + llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &bbcont) { + + std::vector to_replace; + for (auto &insn : llvm::instructions(F)) { + if (llvm::CallBase *call = llvm::dyn_cast(&insn)) { + if (call->getCalledFunction() && + call->getCalledFunction()->getName().startswith( + "__remill_function_return")) { + + to_replace.push_back(call); + } + } + } + + + const std::vector &ret_decl = bbcont.ReturnValue(); + remill::IntrinsicTable intrinsics(F.getParent()); + auto pres_analyses = llvm::PreservedAnalyses::all(); + for (auto rep : to_replace) { + auto state = rep->getArgOperand(0); + auto mem = rep->getArgOperand(2); + llvm::IRBuilder<> ir(rep); + ir.SetInsertPoint(rep); + + + std::vector args; + for (auto vdecl : ret_decl) { + args.push_back(anvill::LoadLiftedValue( + vdecl, this->lifter.Options().TypeDictionary(), intrinsics, ir, state, + mem)); + } + + auto tgt = GetOrCreateAnvillReturnFunc(F.getParent()); + ir.CreateCall(tgt, args); + + rep->replaceAllUsesWith(mem); + rep->eraseFromParent(); + pres_analyses = llvm::PreservedAnalyses::none(); + } + + F.dump(); + CHECK(!llvm::verifyFunction(F, &llvm::errs())); + + return pres_analyses; +} +} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/Utils.cpp b/lib/Passes/Utils.cpp index a12db847b..d1bef8a2b 100644 --- a/lib/Passes/Utils.cpp +++ b/lib/Passes/Utils.cpp @@ -8,7 +8,9 @@ #include "Utils.h" +#include #include +#include #include #include #include @@ -231,4 +233,16 @@ llvm::Function *AddressOfReturnAddressFunction(llvm::Module *module) { return func; } +llvm::Function *GetOrCreateAnvillReturnFunc(llvm::Module *mod) { + auto tgt_type = + llvm::FunctionType::get(llvm::Type::getVoidTy(mod->getContext()), true); + if (auto res = mod->getFunction(anvill::kAnvillBasicBlockReturn)) { + return res; + } + + + return llvm::Function::Create(tgt_type, llvm::GlobalValue::ExternalLinkage, + anvill::kAnvillBasicBlockReturn, mod); +} + } // namespace anvill diff --git a/lib/Passes/Utils.h b/lib/Passes/Utils.h index 25e4240d4..536a26806 100644 --- a/lib/Passes/Utils.h +++ b/lib/Passes/Utils.h @@ -76,4 +76,6 @@ llvm::PreservedAnalyses ConvertBoolToPreserved(bool); // address. This is not available on all architectures / OSes. llvm::Function *AddressOfReturnAddressFunction(llvm::Module *module); +llvm::Function *GetOrCreateAnvillReturnFunc(llvm::Module *module); + } // namespace anvill diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 074d19393..7419daaaa 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -272,7 +272,7 @@ void StoreNativeValueToRegister(llvm::Value *native_val, auto &context = module->getContext(); auto reg_type = remill::RecontextualizeType(reg->type, context); - auto ptr_to_reg = reg->AddressOf(state_ptr, ir.GetInsertBlock()); + auto ptr_to_reg = reg->AddressOf(state_ptr, ir); llvm::StoreInst *store = nullptr; @@ -326,7 +326,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, } else if (decl.mem_reg) { auto mem_reg_type = remill::RecontextualizeType(decl.mem_reg->type, context); - auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir.GetInsertBlock()); + auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir); llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); CopyMetadataTo(native_val, addr); @@ -390,13 +390,10 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, // Load it out of a register. if (decl.reg) { auto reg_type = remill::RecontextualizeType(decl.reg->type, context); - auto ptr_to_reg = decl.reg->AddressOf(state_ptr, ir.GetInsertBlock()); + auto ptr_to_reg = decl.reg->AddressOf(state_ptr, ir); auto reg = ir.CreateLoad(reg_type, ptr_to_reg); CopyMetadataTo(mem_ptr, reg); - auto ipoint = ir.GetInsertPoint(); - auto iblock = ir.GetInsertBlock(); auto adapted_val = types.ConvertValueToType(ir, reg, decl_type); - ir.SetInsertPoint(iblock, ipoint); if (adapted_val) { return adapted_val; @@ -413,7 +410,7 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, } else if (decl.mem_reg) { auto mem_reg_type = remill::RecontextualizeType(decl.mem_reg->type, context); - auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir.GetInsertBlock()); + auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir); llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); CopyMetadataTo(mem_ptr, addr); if (0ll < decl.mem_offset) { From bc5a8a71f8ec820c480ac9dd2e59e4fa69d16cf9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 14 Dec 2022 15:06:22 -0500 Subject: [PATCH 070/163] uncomment some opts --- lib/Optimize.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index f172ae01a..a4f0ce6a9 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -179,17 +179,15 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // fpm.addPass(llvm::SinkingPass()); // NewGVN has bugs with `____strtold_l_internal` from chal5, amd64. - // fpm.addPass(llvm::NewGVNPass()); + fpm.addPass(llvm::NewGVNPass()); fpm.addPass(llvm::SCCPPass()); - // NOTE(alex): This pass is extremely slow with LLVM 14. - // fpm.addPass(llvm::DSEPass()); + fpm.addPass(llvm::DSEPass()); fpm.addPass(llvm::SROAPass()); fpm.addPass(llvm::EarlyCSEPass(true)); fpm.addPass(llvm::BDCEPass()); fpm.addPass(llvm::SimplifyCFGPass()); - // NOTE(alex): This pass is extremely slow with LLVM 14. - // fpm.addPass(llvm::SinkingPass()); + fpm.addPass(llvm::SinkingPass()); fpm.addPass(llvm::SimplifyCFGPass()); fpm.addPass(llvm::InstCombinePass()); fpm.addPass(anvill::ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( @@ -256,6 +254,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(CodeQualityStatCollector()); AddConvertXorsToCmps(second_fpm); second_fpm.addPass(llvm::DCEPass()); + second_fpm.addPass(llvm::DSEPass()); mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); @@ -270,7 +269,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, } } - // Manually clear the analyses to prevent ASAN failures in the destructors. mam.clear(); fam.clear(); From e26c58ed8d1c28bb08a714f6bb580a62e0d25968 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 15 Dec 2022 14:20:09 -0500 Subject: [PATCH 071/163] add liveness to spec --- data_specifications/specification.proto | 3 ++ include/anvill/Declarations.h | 32 +++++++++++++++--- lib/Declarations.cpp | 45 +++++++++++++++++++++++++ lib/Lifters/BasicBlockLifter.cpp | 9 +++++ lib/Protobuf.cpp | 25 ++++++++++++++ 5 files changed, 110 insertions(+), 4 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index a5bcca154..5b2e38b0b 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -176,6 +176,9 @@ message OffsetDomain { message BlockContext { repeated OffsetDomain symvals = 1; + + repeated Register live_at_entries = 2; + repeated Register live_at_exits = 3; } diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index c8750f095..d135e5d2c 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -202,12 +202,23 @@ struct LocalVariableDecl { }; class BasicBlockContext { + private: + std::vector RegistersNotInVariables( + const std::vector &all) const; + public: virtual std::vector GetAvailableVariables() const = 0; virtual const SpecStackOffsets &GetStackOffsets() const = 0; virtual const std::vector &ReturnValue() const = 0; + virtual const std::vector & + LiveRegistersAtEntry() const = 0; + virtual const std::vector & + LiveRegistersAtExit() const = 0; + + std::vector LiveRegistersNotInVariablesAtEntry(); + std::vector LiveRegistersNotInVariablesAtExit(); llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; }; @@ -217,15 +228,22 @@ class SpecBlockContext : public BasicBlockContext { private: const FunctionDecl &decl; SpecStackOffsets offsets; + std::vector live_regs_at_entry; + std::vector live_regs_at_exit; public: - SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets) - : decl(decl), - offsets(std::move(offsets)) {} + SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets); + virtual std::vector GetAvailableVariables() const override; virtual const SpecStackOffsets &GetStackOffsets() const override; - virtual const std::vector &ReturnValue() const override; + virtual const std::vector & + LiveRegistersAtEntry() const override; + // should be a subset of live registers at entry + virtual const std::vector & + LiveRegistersAtExit() const override; + + virtual const std::vector &ReturnValue() const override; }; // A function decl, as represented at a "near ABI" level. To be specific, @@ -264,6 +282,12 @@ struct FunctionDecl : public CallableDecl { std::unordered_map stack_offsets; + std::unordered_map> + live_regs_at_entry; + + std::unordered_map> + live_regs_at_exit; + std::uint64_t stack_depth; // Declare this function in an LLVM module. diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 1c48eb48f..aca0dea34 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -30,7 +30,9 @@ #include #include +#include #include +#include #include #include "Arch/Arch.h" @@ -121,6 +123,49 @@ const std::vector &SpecBlockContext::ReturnValue() const { return this->decl.returns; } + +std::vector +BasicBlockContext::RegistersNotInVariables( + const std::vector &all) const { + + std::unordered_set covered_registers; + for (auto cov_var : this->GetAvailableVariables()) { + if (cov_var.reg) { + covered_registers.insert(cov_var.reg); + } + } + std::vector res; + std::copy_if(all.begin(), all.end(), std::back_inserter(res), + [&covered_registers](const remill::Register *reg) { + return covered_registers.find(reg) == covered_registers.end(); + }); + return res; +} + +const std::vector & +SpecBlockContext::LiveRegistersAtEntry() const { + return this->live_regs_at_entry; +} + +SpecBlockContext::SpecBlockContext(const FunctionDecl &decl, + SpecStackOffsets offsets) + : decl(decl), + offsets(std::move(offsets)) { + this->decl.arch->ForEachRegister([this](const remill::Register *reg) { + this->live_regs_at_entry.push_back(reg); + }); + + this->decl.arch->ForEachRegister([this](const remill::Register *reg) { + this->live_regs_at_exit.push_back(reg); + }); +} + +const std::vector & +SpecBlockContext::LiveRegistersAtExit() const { + return this->live_regs_at_exit; +} + + std::vector SpecBlockContext::GetAvailableVariables() const { std::vector decls; for (auto p : this->decl.params) { diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 233da97a6..95fed51f8 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -360,8 +360,15 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // pointer to varstruct params[remill::kStatePointerArgNum] = llvm::PointerType::get(context, 0); + //next_pc_out params.push_back(llvm::PointerType::get(context, 0)); + + + // state structure + params.push_back(llvm::PointerType::get(context, 0)); + + llvm::FunctionType *func_type = llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); @@ -378,11 +385,13 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto in_vars = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); + auto state = remill::NthArgument(func, remill::kNumBlockArgs + 1); memory->setName("memory"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); in_vars->setName("in_vars"); + state->setName("state"); auto liftedty = this->options.arch->LiftedFunctionType(); diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index ec43d9615..d6e650177 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -17,10 +17,14 @@ #include #include +#include +#include #include #include #include +#include #include +#include #include "anvill/Declarations.h" #include "specification.pb.h" @@ -539,6 +543,21 @@ Result ProtobufTranslator::DecodeFunction( } +namespace { +void AddRegistersToBB( + std::unordered_map> &map, + uint64_t bb_addr, const remill::Arch *arch, + const ::google::protobuf::RepeatedPtrField<::specification::Register> + ®s) { + auto &v = map.insert({bb_addr, std::vector()}) + .first->second; + std::transform(regs.begin(), regs.end(), std::back_inserter(v), + [arch](specification::Register reg) { + return arch->RegisterByName(reg.register_name()); + }); +} +} // namespace + void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { for (auto blk : obj.blocks()) { @@ -582,6 +601,12 @@ void ProtobufTranslator::ParseCFGIntoFunction( SpecStackOffsets off = {affine_equalities}; decl.stack_offsets.insert({blk_addr, off}); + + AddRegistersToBB(decl.live_regs_at_entry, blk_addr, this->arch, + ctx.live_at_entries()); + + AddRegistersToBB(decl.live_regs_at_exit, blk_addr, this->arch, + ctx.live_at_exits()); } } From 068914e489d83d9daf6e90627e1b9ca64e422db9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 15 Dec 2022 15:34:55 -0500 Subject: [PATCH 072/163] live loads working --- include/anvill/Declarations.h | 10 ++++-- lib/Declarations.cpp | 53 +++++++++++++++++++++----------- lib/Lifters/BasicBlockLifter.cpp | 20 +++++++++++- lib/Lifters/BasicBlockLifter.h | 3 ++ lib/Protobuf.cpp | 13 +++++--- 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index d135e5d2c..4ff3522eb 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -217,8 +217,10 @@ class BasicBlockContext { virtual const std::vector & LiveRegistersAtExit() const = 0; - std::vector LiveRegistersNotInVariablesAtEntry(); - std::vector LiveRegistersNotInVariablesAtExit(); + std::vector + LiveRegistersNotInVariablesAtEntry() const; + std::vector + LiveRegistersNotInVariablesAtExit() const; llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; }; @@ -232,7 +234,9 @@ class SpecBlockContext : public BasicBlockContext { std::vector live_regs_at_exit; public: - SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets); + SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets, + std::vector live_regs_at_entry, + std::vector live_regs_at_exit); virtual std::vector GetAvailableVariables() const override; virtual const SpecStackOffsets &GetStackOffsets() const override; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index aca0dea34..520cb6a1e 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -142,23 +142,30 @@ BasicBlockContext::RegistersNotInVariables( return res; } + +std::vector +BasicBlockContext::LiveRegistersNotInVariablesAtEntry() const { + return this->RegistersNotInVariables(this->LiveRegistersAtEntry()); +} +std::vector +BasicBlockContext::LiveRegistersNotInVariablesAtExit() const { + return this->RegistersNotInVariables(this->LiveRegistersAtExit()); +} + + const std::vector & SpecBlockContext::LiveRegistersAtEntry() const { return this->live_regs_at_entry; } -SpecBlockContext::SpecBlockContext(const FunctionDecl &decl, - SpecStackOffsets offsets) +SpecBlockContext::SpecBlockContext( + const FunctionDecl &decl, SpecStackOffsets offsets, + std::vector live_regs_at_entry, + std::vector live_regs_at_exit) : decl(decl), - offsets(std::move(offsets)) { - this->decl.arch->ForEachRegister([this](const remill::Register *reg) { - this->live_regs_at_entry.push_back(reg); - }); - - this->decl.arch->ForEachRegister([this](const remill::Register *reg) { - this->live_regs_at_exit.push_back(reg); - }); -} + offsets(std::move(offsets)), + live_regs_at_entry(std::move(live_regs_at_entry)), + live_regs_at_exit(std::move(live_regs_at_exit)) {} const std::vector & SpecBlockContext::LiveRegistersAtExit() const { @@ -376,14 +383,24 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { } } -SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { - auto offs = this->stack_offsets.find(addr); - if (offs != this->stack_offsets.end()) { - - return SpecBlockContext(*this, offs->second); - } else { - return SpecBlockContext(*this, SpecStackOffsets()); +namespace { +template +V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { + if (map.find(addr) == map.end()) { + return def; } + + return map.find(addr)->second; +} +} // namespace + +SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { + return SpecBlockContext( + *this, GetWithDef(addr, this->stack_offsets, SpecStackOffsets()), + GetWithDef(addr, this->live_regs_at_entry, + std::vector()), + GetWithDef(addr, this->live_regs_at_exit, + std::vector())); } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 95fed51f8..62e99ab02 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -344,6 +345,19 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { } +void BasicBlockLifter::InitializeLiveUncoveredRegs(llvm::Value *state_argument, + llvm::IRBuilder<> &ir) { + auto need_to_init = this->block_context.LiveRegistersNotInVariablesAtEntry(); + + for (auto init_reg : need_to_init) { + auto reg_src_ptr = init_reg->AddressOf(state_argument, ir); + auto reg_dest_ptr = init_reg->AddressOf(this->state_ptr, ir); + auto reg_type = + remill::RecontextualizeType(init_reg->type, this->llvm_context); + ir.CreateStore(ir.CreateLoad(reg_type, reg_src_ptr), reg_dest_ptr); + } +} + llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } @@ -423,6 +437,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->state_ptr = this->AllocateAndInitializeStateStructure(&blk, options.arch); + + + this->InitializeLiveUncoveredRegs(state, ir); // Put registers that are referencing the stack in terms of their displacement so that we // Can resolve these stack references later . @@ -525,7 +542,7 @@ void BasicBlockLifter::CallBasicBlockFunction( const CallableBasicBlockFunction &cbfunc) const { - std::vector args(remill::kNumBlockArgs + 1); + std::vector args(remill::kNumBlockArgs + 2); auto out_param_locals = builder.CreateAlloca(this->var_struct_ty); @@ -539,6 +556,7 @@ void BasicBlockLifter::CallBasicBlockFunction( args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); + args[remill::kNumBlockArgs + 1] = parent_state; this->PackLocals(builder, parent_state, out_param_locals, cbfunc.GetInScopeVaraibles()); diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index eb1027863..486c05261 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -83,6 +83,9 @@ class BasicBlockLifter : public CodeLifter { llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; + void InitializeLiveUncoveredRegs(llvm::Value *state_argument, + llvm::IRBuilder<> &); + public: BasicBlockLifter(const BasicBlockContext &block_context, const CodeBlock &block_def, const LifterOptions &options_, diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index d6e650177..dfca0f35d 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -551,10 +551,15 @@ void AddRegistersToBB( ®s) { auto &v = map.insert({bb_addr, std::vector()}) .first->second; - std::transform(regs.begin(), regs.end(), std::back_inserter(v), - [arch](specification::Register reg) { - return arch->RegisterByName(reg.register_name()); - }); + + for (auto reg : regs) { + auto fill_reg = arch->RegisterByName(reg.register_name()); + if (fill_reg) { + v.push_back(fill_reg); + } else { + LOG(ERROR) << "No reg for: " << reg.register_name(); + } + } } } // namespace From e63d4d161f20d3ec7a52d29a8e4d5418070cc89e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 15 Dec 2022 16:13:04 -0500 Subject: [PATCH 073/163] save live registers at exit --- lib/Lifters/BasicBlockLifter.cpp | 24 +++++++++++++++++++++--- lib/Lifters/BasicBlockLifter.h | 2 ++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 62e99ab02..26a5f0582 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -358,6 +358,19 @@ void BasicBlockLifter::InitializeLiveUncoveredRegs(llvm::Value *state_argument, } } +void BasicBlockLifter::SaveLiveUncoveredRegs(llvm::Value *state_argument, + llvm::IRBuilder<> &ir) { + auto need_to_init = this->block_context.LiveRegistersNotInVariablesAtExit(); + + for (auto init_reg : need_to_init) { + auto reg_src_ptr = init_reg->AddressOf(this->state_ptr, ir); + auto reg_dest_ptr = init_reg->AddressOf(state_argument, ir); + auto reg_type = + remill::RecontextualizeType(init_reg->type, this->llvm_context); + ir.CreateStore(ir.CreateLoad(reg_type, reg_src_ptr), reg_dest_ptr); + } +} + llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } @@ -444,11 +457,13 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Can resolve these stack references later . + auto sp_value = + options.stack_pointer_init_procedure(ir, sp_reg, this->block_def.addr); auto sp_ptr = sp_reg->AddressOf(this->state_ptr, ir); + auto arg_sp_ptr = sp_reg->AddressOf(state, ir); // Initialize the stack pointer. - ir.CreateStore( - options.stack_pointer_init_procedure(ir, sp_reg, this->block_def.addr), - sp_ptr); + ir.CreateStore(sp_value, sp_ptr); + ir.CreateStore(arg_sp_ptr, sp_ptr); auto stack_offsets = this->block_context.GetStackOffsets(); @@ -482,7 +497,10 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->PackLocals(ir, this->state_ptr, in_vars, this->block_context.GetAvailableVariables()); + this->SaveLiveUncoveredRegs(state, ir); + ir.CreateRet(ret_mem); + BasicBlockFunction bbf{func, pc_arg, in_vars, mem_arg, next_pc_out}; return bbf; diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 486c05261..dcc3911ee 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -86,6 +86,8 @@ class BasicBlockLifter : public CodeLifter { void InitializeLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &); + void SaveLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &); + public: BasicBlockLifter(const BasicBlockContext &block_context, const CodeBlock &block_def, const LifterOptions &options_, From 31c55705349a1ca5e730642ff2f3fa6211575f25 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 16 Dec 2022 16:20:11 +0100 Subject: [PATCH 074/163] Begin porting pointer lifter to low level lifter --- .../Passes/ConvertPointerArithmeticToGEP.h | 13 ++++--- lib/Lifters/BasicBlockLifter.cpp | 4 +++ lib/Lifters/FunctionLifter.cpp | 14 -------- lib/Optimize.cpp | 2 +- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 36 +++++++++---------- 5 files changed, 30 insertions(+), 39 deletions(-) diff --git a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h index 658287ee0..42b2bec3f 100644 --- a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h +++ b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h @@ -17,10 +17,12 @@ #include #include +#include "BasicBlockPass.h" + namespace anvill { class ConvertPointerArithmeticToGEP final - : public llvm::PassInfoMixin { + : public BasicBlockPass { private: struct Impl; std::unique_ptr impl; @@ -30,14 +32,15 @@ class ConvertPointerArithmeticToGEP final using TypeMap = std::unordered_map; using MDMap = std::unordered_map; - // Function pass entry point - llvm::PreservedAnalyses run(llvm::Function &function, - llvm::FunctionAnalysisManager &fam); + llvm::PreservedAnalyses + runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &); // Returns the pass name static llvm::StringRef name(void); - ConvertPointerArithmeticToGEP(TypeMap &types, StructMap &structs, MDMap &md); + ConvertPointerArithmeticToGEP(const BasicBlockContexts &contexts, + TypeMap &types, StructMap &structs, MDMap &md); ConvertPointerArithmeticToGEP(const ConvertPointerArithmeticToGEP &); ~ConvertPointerArithmeticToGEP(); }; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 26a5f0582..62bba1089 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -542,6 +542,10 @@ void BasicBlockLifter::UnpackLocals( auto ptr = bldr.CreateGEP(this->var_struct_ty, returned_value, {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, field_offset)}); + if (auto insn = llvm::dyn_cast(ptr)) { + insn->setMetadata("anvill.type", + this->type_specifier.EncodeToMetadata(decl.spec_type)); + } auto loaded_var_val = bldr.CreateLoad(decl.type, ptr, decl.name); field_offset += 1; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 4f37332d4..d9c08a83a 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -242,13 +242,6 @@ llvm::Function *FunctionLifter::GetOrDeclareFunction(const FunctionDecl &decl) { native_func->addFnAttr(llvm::Attribute::NoReturn); } - std::vector args; - for (auto &arg : decl.params) { - args.push_back(type_specifier.EncodeToMetadata(arg.spec_type)); - } - native_func->setMetadata("anvill.args", - llvm::MDNode::get(llvm_context, args)); - return native_func; } @@ -791,13 +784,6 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, new_version->setMetadata(pc_annotation_id, func_annotation); } - std::vector args; - for (auto &arg : decl.params) { - args.push_back(type_specifier.EncodeToMetadata(arg.spec_type)); - } - new_version->setMetadata("anvill.args", - llvm::MDNode::get(llvm_context, args)); - // Update the context to keep its internal concepts of what LLVM objects // correspond with which native binary addresses. lifter_context.AddEntity(new_version, decl.address); diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 7f1f92325..5ebad5cca 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -243,7 +243,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::SROAPass()); AddBranchRecovery(fpm); - fpm.addPass(ConvertPointerArithmeticToGEP(types, structs, md)); + fpm.addPass(ConvertPointerArithmeticToGEP(contexts, types, structs, md)); pb.crossRegisterProxies(lam, fam, cam, mam); diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 9b03e642f..6a50820b2 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -34,6 +34,7 @@ namespace anvill { struct ConvertPointerArithmeticToGEP::Impl { + const BasicBlockContexts &contexts; TypeMap &types; StructMap &structs; MDMap &md; @@ -73,8 +74,10 @@ struct ConvertPointerArithmeticToGEP::Impl { bool FoldPtrAdd(llvm::Function &f); bool FoldScaledIndex(llvm::Function &f); - Impl(TypeMap &types, StructMap &structs, MDMap &md) - : types(types), + Impl(const BasicBlockContexts &contexts, TypeMap &types, StructMap &structs, + MDMap &md) + : contexts(contexts), + types(types), structs(structs), md(md) {} }; @@ -228,14 +231,7 @@ TypeSpec ConvertPointerArithmeticToGEP::Impl::MDToTypeSpec(llvm::MDNode *md) { std::optional ConvertPointerArithmeticToGEP::Impl::GetTypeInfo(llvm::Value *val) { llvm::MDNode *md = nullptr; - if (auto arg = llvm::dyn_cast(val)) { - auto args_md = arg->getParent()->getMetadata("anvill.args"); - if (!args_md) { - return {}; - } - - md = llvm::cast(args_md->getOperand(arg->getArgNo()).get()); - } else if (auto gvar = llvm::dyn_cast(val)) { + if (auto gvar = llvm::dyn_cast(val)) { md = gvar->getMetadata("anvill.type"); } else if (auto ptr_insn = llvm::dyn_cast(val)) { md = ptr_insn->getMetadata("anvill.type"); @@ -314,15 +310,17 @@ ConvertPointerArithmeticToGEP::Impl::TypeSpecToMD(llvm::LLVMContext &context, [this, &context](auto &&t) { return TypeSpecToMD(context, t); }, type); } -ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP(TypeMap &types, - StructMap &structs, - MDMap &md) - : impl(std::make_unique(types, structs, md)) {} +ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP( + const BasicBlockContexts &contexts, TypeMap &types, StructMap &structs, + MDMap &md) + : BasicBlockPass(contexts), + impl(std::make_unique(contexts, types, structs, md)) {} ConvertPointerArithmeticToGEP::ConvertPointerArithmeticToGEP( const ConvertPointerArithmeticToGEP &pass) - : impl(std::make_unique(pass.impl->types, pass.impl->structs, - pass.impl->md)) {} + : BasicBlockPass(pass.impl->contexts), + impl(std::make_unique(pass.impl->contexts, pass.impl->types, + pass.impl->structs, pass.impl->md)) {} ConvertPointerArithmeticToGEP::~ConvertPointerArithmeticToGEP() = default; @@ -564,9 +562,9 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldScaledIndex(llvm::Function &f) { return false; } -llvm::PreservedAnalyses -ConvertPointerArithmeticToGEP::run(llvm::Function &function, - llvm::FunctionAnalysisManager &fam) { +llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::runOnBasicBlockFunction( + llvm::Function &function, llvm::FunctionAnalysisManager &fam, + const anvill::BasicBlockContext &) { bool changed = impl->ConvertLoadInt(function); changed |= impl->FoldPtrAdd(function); changed |= impl->FoldScaledIndex(function); From 38c7f74926d27308d45718d5632b39baa548b7af Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 16 Dec 2022 15:40:33 -0500 Subject: [PATCH 075/163] remove mac only header def --- include/anvill/Declarations.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 4ff3522eb..f83423ca5 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -8,7 +8,6 @@ #pragma once -#include <_types/_uint64_t.h> #include #include #include From a67f34af92f8193969298963fc66f958e0621254 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 19 Dec 2022 15:12:58 -0500 Subject: [PATCH 076/163] start porting to split params --- lib/Lifters/BasicBlockLifter.cpp | 60 ++++++++++++++++++++++---------- lib/Lifters/BasicBlockLifter.h | 12 +++++-- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 62bba1089..4b878a2e6 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -385,15 +386,16 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { std::vector params = std::vector( lifted_func_type->param_begin(), lifted_func_type->param_end()); - // pointer to varstruct + // pointer to state pointer params[remill::kStatePointerArgNum] = llvm::PointerType::get(context, 0); //next_pc_out params.push_back(llvm::PointerType::get(context, 0)); - // state structure - params.push_back(llvm::PointerType::get(context, 0)); + for (auto vtype : this->var_struct_ty->elements()) { + params.push_back(vtype); + } llvm::FunctionType *func_type = @@ -408,6 +410,16 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->setMetadata(anvill::kBasicBlockMetadata, GetBasicBlockAnnotation(this->block_def.addr)); + + auto start_ind = lifted_func_type->getNumParams(); + for (auto v : this->block_context.GetAvailableVariables()) { + if (!v.name.empty()) { + auto arg = remill::NthArgument(func, start_ind); + arg->setName(v.name); + } + start_ind += 1; + } + auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto in_vars = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); @@ -477,7 +489,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } } - this->UnpackLocals(ir, in_vars, this->state_ptr, + + PointerProvider ptr_provider = [this, func](size_t index) -> llvm::Value * { + return this->ProvidePointerFromFunctionArgs(func, index); + }; + + this->UnpackLocals(ir, ptr_provider, this->state_ptr, this->block_context.GetAvailableVariables()); @@ -494,7 +511,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto ret_mem = ir.CreateCall(this->lifted_func, args); - this->PackLocals(ir, this->state_ptr, in_vars, + this->PackLocals(ir, this->state_ptr, ptr_provider, this->block_context.GetAvailableVariables()); this->SaveLiveUncoveredRegs(state, ir); @@ -514,14 +531,11 @@ llvm::StructType *BasicBlockLifter::StructTypeFromVars() const { // Packs in scope variables into a struct void BasicBlockLifter::PackLocals( llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, - llvm::Value *into_vars, const std::vector &decls) const { + PointerProvider into_vars, const std::vector &decls) const { - auto i32 = llvm::IntegerType::get(llvm_context, 32); uint64_t field_offset = 0; for (auto decl : decls) { - auto ptr = bldr.CreateGEP(this->var_struct_ty, into_vars, - {llvm::ConstantInt::get(i32, 0), - llvm::ConstantInt::get(i32, field_offset)}); + auto ptr = into_vars(field_offset); field_offset += 1; auto state_loaded_value = @@ -533,20 +547,16 @@ void BasicBlockLifter::PackLocals( } void BasicBlockLifter::UnpackLocals( - llvm::IRBuilder<> &bldr, llvm::Value *returned_value, + llvm::IRBuilder<> &bldr, PointerProvider returned_value, llvm::Value *into_state_ptr, const std::vector &decls) const { uint64_t field_offset = 0; - auto i32 = llvm::IntegerType::get(llvm_context, 32); for (auto decl : decls) { - auto ptr = bldr.CreateGEP(this->var_struct_ty, returned_value, - {llvm::ConstantInt::get(i32, 0), - llvm::ConstantInt::get(i32, field_offset)}); + auto ptr = returned_value(field_offset); if (auto insn = llvm::dyn_cast(ptr)) { insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata(decl.spec_type)); } - auto loaded_var_val = bldr.CreateLoad(decl.type, ptr, decl.name); field_offset += 1; auto new_mem_ptr = StoreNativeValue( @@ -580,7 +590,13 @@ void BasicBlockLifter::CallBasicBlockFunction( args[remill::kNumBlockArgs + 1] = parent_state; - this->PackLocals(builder, parent_state, out_param_locals, + + PointerProvider ptr_provider = + [this, out_param_locals](size_t index) -> llvm::Value * { + return this->ProvidePointerFromStruct(out_param_locals, index); + }; + + this->PackLocals(builder, parent_state, ptr_provider, cbfunc.GetInScopeVaraibles()); auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); @@ -589,7 +605,7 @@ void BasicBlockLifter::CallBasicBlockFunction( builder.CreateStore(new_mem_ptr, mem_ptr_ref); - this->UnpackLocals(builder, out_param_locals, parent_state, + this->UnpackLocals(builder, ptr_provider, parent_state, cbfunc.GetInScopeVaraibles()); } @@ -642,4 +658,12 @@ CallableBasicBlockFunction::GetInScopeVaraibles() const { return this->in_scope_locals; } +llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::Value *target_sty, + size_t index) const {} + +llvm::Value * +BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *, + size_t index) const {} + + } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index dcc3911ee..5944b6dd4 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -36,6 +36,12 @@ class CallableBasicBlockFunction; */ class BasicBlockLifter : public CodeLifter { private: + llvm::Value *ProvidePointerFromStruct(llvm::Value *, size_t index) const; + + llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *, + size_t index) const; + + const BasicBlockContext &block_context; const CodeBlock &block_def; @@ -102,12 +108,14 @@ class BasicBlockLifter : public CodeLifter { CallableBasicBlockFunction LiftBasicBlockFunction() &&; + + using PointerProvider = std::function; // Packs in scope variables into a struct void PackLocals(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, - llvm::Value *into_vars, + PointerProvider into_vars, const std::vector &decls) const; - void UnpackLocals(llvm::IRBuilder<> &, llvm::Value *returned_value, + void UnpackLocals(llvm::IRBuilder<> &, PointerProvider returned_value, llvm::Value *into_state_ptr, const std::vector &) const; From 7be0fb595eb40175d62f7d50d0b2685af1d0e272 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 19 Dec 2022 16:18:41 -0500 Subject: [PATCH 077/163] finish pass params and vars directly --- include/anvill/Declarations.h | 1 + lib/Lifters/BasicBlockLifter.cpp | 95 +++++++++++++++++++------------- lib/Lifters/BasicBlockLifter.h | 11 ++-- lib/Lifters/FunctionLifter.cpp | 9 ++- 4 files changed, 71 insertions(+), 45 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index f83423ca5..834c480cb 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -206,6 +206,7 @@ class BasicBlockContext { const std::vector &all) const; public: + virtual ~BasicBlockContext() = default; virtual std::vector GetAvailableVariables() const = 0; virtual const SpecStackOffsets &GetStackOffsets() const = 0; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 4b878a2e6..7e750a2b5 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -34,9 +35,9 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { this->RecursivelyInlineFunctionCallees(bbfunc.func); anvill::EntityLifter lifter(options); - return CallableBasicBlockFunction(bbfunc.func, - this->block_context.GetAvailableVariables(), - block_def, std::move(*this)); + auto avails = this->block_context->GetAvailableVariables(); + return CallableBasicBlockFunction(bbfunc.func, std::move(avails), block_def, + std::move(*this)); } @@ -348,7 +349,7 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { void BasicBlockLifter::InitializeLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &ir) { - auto need_to_init = this->block_context.LiveRegistersNotInVariablesAtEntry(); + auto need_to_init = this->block_context->LiveRegistersNotInVariablesAtEntry(); for (auto init_reg : need_to_init) { auto reg_src_ptr = init_reg->AddressOf(state_argument, ir); @@ -361,7 +362,7 @@ void BasicBlockLifter::InitializeLiveUncoveredRegs(llvm::Value *state_argument, void BasicBlockLifter::SaveLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &ir) { - auto need_to_init = this->block_context.LiveRegistersNotInVariablesAtExit(); + auto need_to_init = this->block_context->LiveRegistersNotInVariablesAtExit(); for (auto init_reg : need_to_init) { auto reg_src_ptr = init_reg->AddressOf(this->state_ptr, ir); @@ -393,8 +394,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { params.push_back(llvm::PointerType::get(context, 0)); - for (auto vtype : this->var_struct_ty->elements()) { - params.push_back(vtype); + for (size_t i = 0; i < this->var_struct_ty->getNumElements(); i++) { + // pointer to each param + params.push_back(llvm::PointerType::get(context, 0)); } @@ -411,8 +413,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { GetBasicBlockAnnotation(this->block_def.addr)); - auto start_ind = lifted_func_type->getNumParams(); - for (auto v : this->block_context.GetAvailableVariables()) { + auto start_ind = lifted_func_type->getNumParams() + 1; + for (auto v : this->block_context->GetAvailableVariables()) { if (!v.name.empty()) { auto arg = remill::NthArgument(func, start_ind); arg->setName(v.name); @@ -421,15 +423,13 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); - auto in_vars = remill::NthArgument(func, remill::kStatePointerArgNum); + auto state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); - auto state = remill::NthArgument(func, remill::kNumBlockArgs + 1); memory->setName("memory"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); - in_vars->setName("in_vars"); state->setName("state"); @@ -477,7 +477,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { ir.CreateStore(sp_value, sp_ptr); ir.CreateStore(arg_sp_ptr, sp_ptr); - auto stack_offsets = this->block_context.GetStackOffsets(); + auto stack_offsets = this->block_context->GetStackOffsets(); for (auto ®_off : stack_offsets.affine_equalities) { if (reg_off.base_register && reg_off.base_register == this->sp_reg) { @@ -495,7 +495,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { }; this->UnpackLocals(ir, ptr_provider, this->state_ptr, - this->block_context.GetAvailableVariables()); + this->block_context->GetAvailableVariables()); auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); @@ -512,20 +512,20 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->PackLocals(ir, this->state_ptr, ptr_provider, - this->block_context.GetAvailableVariables()); + this->block_context->GetAvailableVariables()); this->SaveLiveUncoveredRegs(state, ir); ir.CreateRet(ret_mem); - BasicBlockFunction bbf{func, pc_arg, in_vars, mem_arg, next_pc_out}; + BasicBlockFunction bbf{func, pc_arg, mem_arg, next_pc_out}; return bbf; } llvm::StructType *BasicBlockLifter::StructTypeFromVars() const { - return this->block_context.StructTypeFromVars(this->llvm_context); + return this->block_context->StructTypeFromVars(this->llvm_context); } // Packs in scope variables into a struct @@ -574,11 +574,11 @@ void BasicBlockLifter::CallBasicBlockFunction( const CallableBasicBlockFunction &cbfunc) const { - std::vector args(remill::kNumBlockArgs + 2); + std::vector args(remill::kNumBlockArgs + 1); auto out_param_locals = builder.CreateAlloca(this->var_struct_ty); - args[remill::kStatePointerArgNum] = out_param_locals; + args[remill::kStatePointerArgNum] = parent_state; args[remill::kPCArgNum] = options.program_counter_init_procedure( builder, pc_reg, cbfunc.GetBlock().addr); @@ -588,17 +588,24 @@ void BasicBlockLifter::CallBasicBlockFunction( args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); - args[remill::kNumBlockArgs + 1] = parent_state; - PointerProvider ptr_provider = - [this, out_param_locals](size_t index) -> llvm::Value * { - return this->ProvidePointerFromStruct(out_param_locals, index); + [&builder, this, out_param_locals](size_t index) -> llvm::Value * { + return this->ProvidePointerFromStruct(builder, out_param_locals, index); }; this->PackLocals(builder, parent_state, ptr_provider, cbfunc.GetInScopeVaraibles()); + + for (size_t ind = 0; + ind < this->block_context->GetAvailableVariables().size(); ind++) { + auto ptr = ptr_provider(ind); + CHECK(ptr != nullptr); + ptr->dump(); + args.push_back(ptr); + } + auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); @@ -616,22 +623,21 @@ void CallableBasicBlockFunction::CallBasicBlockFunction( } CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlock( - const BasicBlockContext &block_context, const CodeBlock &block_def, - const LifterOptions &options_, llvm::Module *semantics_module, - const TypeTranslator &type_specifier) { + std::unique_ptr block_context, + const CodeBlock &block_def, const LifterOptions &options_, + llvm::Module *semantics_module, const TypeTranslator &type_specifier) { - return BasicBlockLifter(block_context, block_def, options_, semantics_module, - type_specifier) + return BasicBlockLifter(std::move(block_context), block_def, options_, + semantics_module, type_specifier) .LiftBasicBlockFunction(); } -BasicBlockLifter::BasicBlockLifter(const BasicBlockContext &block_context, - const CodeBlock &block_def, - const LifterOptions &options_, - llvm::Module *semantics_module, - const TypeTranslator &type_specifier) +BasicBlockLifter::BasicBlockLifter( + std::unique_ptr block_context, + const CodeBlock &block_def, const LifterOptions &options_, + llvm::Module *semantics_module, const TypeTranslator &type_specifier) : CodeLifter(options_, semantics_module, type_specifier), - block_context(block_context), + block_context(std::move(block_context)), block_def(block_def) { this->var_struct_ty = this->StructTypeFromVars(); } @@ -658,12 +664,25 @@ CallableBasicBlockFunction::GetInScopeVaraibles() const { return this->in_scope_locals; } -llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::Value *target_sty, - size_t index) const {} +llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::IRBuilder<> &ir, + llvm::Value *target_sty, + size_t index) const { + auto i32 = llvm::IntegerType::get(llvm_context, 32); + auto ptr = ir.CreateGEP( + this->var_struct_ty, target_sty, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, index)}); + return ptr; +} llvm::Value * -BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *, - size_t index) const {} +BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *func, + size_t index) const { + CHECK(this->options.arch->LiftedFunctionType()->getNumParams() + 1 + + this->block_context->GetAvailableVariables().size() == + func->arg_size()); + return func->getArg( + index + this->options.arch->LiftedFunctionType()->getNumParams() + 1); +} } // namespace anvill \ No newline at end of file diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 5944b6dd4..ce0d2f9a5 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -11,6 +11,7 @@ #include #include +#include #include #include "CodeLifter.h" @@ -22,7 +23,6 @@ namespace anvill { struct BasicBlockFunction { llvm::Function *func; llvm::Argument *pc_arg; - llvm::Argument *variable_ptr; llvm::Argument *mem_ptr; llvm::Argument *next_pc_out_param; }; @@ -36,13 +36,14 @@ class CallableBasicBlockFunction; */ class BasicBlockLifter : public CodeLifter { private: - llvm::Value *ProvidePointerFromStruct(llvm::Value *, size_t index) const; + llvm::Value *ProvidePointerFromStruct(llvm::IRBuilder<> &ir, llvm::Value *, + size_t index) const; llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *, size_t index) const; - const BasicBlockContext &block_context; + std::unique_ptr block_context; const CodeBlock &block_def; llvm::StructType *var_struct_ty{nullptr}; @@ -95,12 +96,12 @@ class BasicBlockLifter : public CodeLifter { void SaveLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &); public: - BasicBlockLifter(const BasicBlockContext &block_context, + BasicBlockLifter(std::unique_ptr block_context, const CodeBlock &block_def, const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier); static CallableBasicBlockFunction - LiftBasicBlock(const BasicBlockContext &block_context, + LiftBasicBlock(std::unique_ptr block_context, const CodeBlock &block_def, const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index d9c08a83a..6c655d7eb 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -377,9 +378,13 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { CallableBasicBlockFunction FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { + std::unique_ptr context = + std::make_unique( + this->curr_decl->GetBlockContext(blk.addr)); + return BasicBlockLifter::LiftBasicBlock( - this->curr_decl->GetBlockContext(blk.addr), blk, this->options, - this->semantics_module.get(), this->type_specifier); + std::move(context), blk, this->options, this->semantics_module.get(), + this->type_specifier); } From 878effee0c05630fd849dfdeba4fa4d4f84f647a Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 20 Dec 2022 08:24:55 -0500 Subject: [PATCH 078/163] small memory pointer fix --- lib/Lifters/BasicBlockLifter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 7e750a2b5..e0ca2eb15 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -505,9 +505,10 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { func->addFnAttr(llvm::Attribute::NoInline); //func->setLinkage(llvm::GlobalValue::InternalLinkage); - // TODO(Ian): memory pointer isnt quite right + auto mem_res = remill::LoadMemoryPointer(ir, this->intrinsics); + std::array args = { - this->state_ptr, pc, mem_arg, next_pc_out}; + this->state_ptr, pc, mem_res, next_pc_out}; auto ret_mem = ir.CreateCall(this->lifted_func, args); From 75f21ed48125a2ffa3b59968ff8e3730627bc493 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 08:56:38 -0500 Subject: [PATCH 079/163] fix stack pointer, also update stackp pointer replacement to reference argument passed scope vars --- include/anvill/Utils.h | 6 ++++++ lib/Lifters/BasicBlockLifter.cpp | 22 +++++++++++++++------- lib/Passes/ReplaceStackReferences.cpp | 8 ++------ lib/Utils.cpp | 11 +++++++++++ 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index eeead9ae5..4b26a9b6d 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -16,6 +16,9 @@ #include #include +#include "anvill/Declarations.h" +#include "anvill/Lifters.h" + namespace llvm { class BasicBlock; class Instruction; @@ -128,4 +131,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, std::optional GetBasicBlockAddr(llvm::Function *func); +llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, + const anvill::LifterOptions &, + const BasicBlockContext &); } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e0ca2eb15..4f70518b8 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -32,10 +32,17 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); + + //bbfunc.func->dump(); + //lifted_func->dump(); + //LOG(FATAL) << "fdumps"; + + this->RecursivelyInlineFunctionCallees(bbfunc.func); anvill::EntityLifter lifter(options); auto avails = this->block_context->GetAvailableVariables(); + return CallableBasicBlockFunction(bbfunc.func, std::move(avails), block_def, std::move(*this)); } @@ -472,10 +479,14 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto sp_value = options.stack_pointer_init_procedure(ir, sp_reg, this->block_def.addr); auto sp_ptr = sp_reg->AddressOf(this->state_ptr, ir); - auto arg_sp_ptr = sp_reg->AddressOf(state, ir); // Initialize the stack pointer. ir.CreateStore(sp_value, sp_ptr); - ir.CreateStore(arg_sp_ptr, sp_ptr); + + // Initialize the program counter + auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); + ir.CreateStore(this->options.program_counter_init_procedure(ir, pc_reg, 0), + pc_ptr); + auto stack_offsets = this->block_context->GetStackOffsets(); @@ -678,11 +689,8 @@ llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::IRBuilder<> &ir, llvm::Value * BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index) const { - CHECK(this->options.arch->LiftedFunctionType()->getNumParams() + 1 + - this->block_context->GetAvailableVariables().size() == - func->arg_size()); - return func->getArg( - index + this->options.arch->LiftedFunctionType()->getNumParams() + 1); + return anvill::ProvidePointerFromFunctionArgs(func, index, this->options, + *this->block_context); } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 3b0ae1d64..edb22358f 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -160,8 +160,6 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); StackModel smodel(cont, this->lifter.Options().arch); - auto vstate = F.getArg(remill::kStatePointerArgNum); - std::vector> to_replace_vars; for (auto use : EnumerateStackPointerUsages(F)) { @@ -187,10 +185,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( ir.SetInsertPoint(insn); } - auto i32 = llvm::IntegerType::get(F.getContext(), 32); - auto g = ir.CreateGEP( - cont.StructTypeFromVars(F.getContext()), vstate, - {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, v.index)}); + auto g = anvill::ProvidePointerFromFunctionArgs( + &F, v.index, this->lifter.Options(), cont); use->set(g); } F.dump(); diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 7419daaaa..bd8ef11f1 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -799,4 +799,15 @@ std::optional GetBasicBlockAddr(llvm::Function *func) { return llvm::cast(v)->getLimitedValue(); } +llvm::Value * +ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, + const anvill::LifterOptions &options, + const anvill::BasicBlockContext &context) { + CHECK(options.arch->LiftedFunctionType()->getNumParams() + 1 + + context.GetAvailableVariables().size() == + func->arg_size()); + return func->getArg(index + + options.arch->LiftedFunctionType()->getNumParams() + 1); +} + } // namespace anvill From 4910c2fe564edbb920fbed881bced73d9a324a0e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 09:08:18 -0500 Subject: [PATCH 080/163] initilaize the program counter later --- lib/Lifters/BasicBlockLifter.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 4f70518b8..ad22a1932 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -482,11 +482,6 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Initialize the stack pointer. ir.CreateStore(sp_value, sp_ptr); - // Initialize the program counter - auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); - ir.CreateStore(this->options.program_counter_init_procedure(ir, pc_reg, 0), - pc_ptr); - auto stack_offsets = this->block_context->GetStackOffsets(); @@ -518,6 +513,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto mem_res = remill::LoadMemoryPointer(ir, this->intrinsics); + // Initialize the program counter + auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); + ir.CreateStore(this->options.program_counter_init_procedure(ir, pc_reg, 0), + pc_ptr); + + std::array args = { this->state_ptr, pc, mem_res, next_pc_out}; auto ret_mem = ir.CreateCall(this->lifted_func, args); From 3c02941e105d80bb23f5c23a39f5b10c5c113c58 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 09:39:15 -0500 Subject: [PATCH 081/163] check if have overlapping variable --- lib/Passes/ReplaceStackReferences.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index edb22358f..ac3f4699c 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -173,7 +173,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( auto referenced_variable = smodel.GetOverlappingParam(stack_offset); //TODO(Ian) handle nonzero offset - if (referenced_variable->offset == 0 && + if (referenced_variable.has_value() && referenced_variable->offset == 0 && llvm::isa(use->get()->getType())) { to_replace_vars.push_back({use, referenced_variable->decl}); } From 1629fbabca4a4eaf741d381fc68820032953f8a8 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 12:37:26 -0500 Subject: [PATCH 082/163] reset insert point --- lib/Lifters/BasicBlockLifter.cpp | 35 ++++----- lib/Passes/ReplaceStackReferences.cpp | 1 - lib/Type.cpp | 103 +++++++++++++------------- lib/Utils.cpp | 7 ++ 4 files changed, 76 insertions(+), 70 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index ad22a1932..0cce376ac 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -470,32 +470,27 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->state_ptr = this->AllocateAndInitializeStateStructure(&blk, options.arch); - this->InitializeLiveUncoveredRegs(state, ir); // Put registers that are referencing the stack in terms of their displacement so that we // Can resolve these stack references later . - auto sp_value = options.stack_pointer_init_procedure(ir, sp_reg, this->block_def.addr); auto sp_ptr = sp_reg->AddressOf(this->state_ptr, ir); // Initialize the stack pointer. ir.CreateStore(sp_value, sp_ptr); - auto stack_offsets = this->block_context->GetStackOffsets(); - for (auto ®_off : stack_offsets.affine_equalities) { if (reg_off.base_register && reg_off.base_register == this->sp_reg) { auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( ir, this->sp_reg, this->block_def.addr, reg_off.offset); StoreNativeValueToRegister(new_value, reg_off.target_register, - type_provider.Dictionary(), intrinsics, &blk, + type_provider.Dictionary(), intrinsics, ir, this->state_ptr); } } - PointerProvider ptr_provider = [this, func](size_t index) -> llvm::Value * { return this->ProvidePointerFromFunctionArgs(func, index); }; @@ -503,11 +498,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->UnpackLocals(ir, ptr_provider, this->state_ptr, this->block_context->GetAvailableVariables()); - auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); - func->addFnAttr(llvm::Attribute::NoInline); //func->setLinkage(llvm::GlobalValue::InternalLinkage); @@ -518,17 +511,17 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { ir.CreateStore(this->options.program_counter_init_procedure(ir, pc_reg, 0), pc_ptr); - std::array args = { this->state_ptr, pc, mem_res, next_pc_out}; - auto ret_mem = ir.CreateCall(this->lifted_func, args); + auto ret_mem = ir.CreateCall(this->lifted_func, args); this->PackLocals(ir, this->state_ptr, ptr_provider, this->block_context->GetAvailableVariables()); this->SaveLiveUncoveredRegs(state, ir); + CHECK(ir.GetInsertPoint() == func->getEntryBlock().end()); ir.CreateRet(ret_mem); BasicBlockFunction bbf{func, pc_arg, mem_arg, next_pc_out}; @@ -551,10 +544,10 @@ void BasicBlockLifter::PackLocals( auto ptr = into_vars(field_offset); field_offset += 1; - auto state_loaded_value = - LoadLiftedValue(decl, this->type_provider.Dictionary(), - this->intrinsics, bldr.GetInsertBlock(), from_state_ptr, - remill::LoadMemoryPointer(bldr, this->intrinsics)); + auto state_loaded_value = LoadLiftedValue( + decl, this->type_provider.Dictionary(), this->intrinsics, bldr, + from_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); + bldr.CreateStore(state_loaded_value, ptr); } } @@ -563,8 +556,10 @@ void BasicBlockLifter::UnpackLocals( llvm::IRBuilder<> &bldr, PointerProvider returned_value, llvm::Value *into_state_ptr, const std::vector &decls) const { + auto blk = bldr.GetInsertBlock(); uint64_t field_offset = 0; for (auto decl : decls) { + ; auto ptr = returned_value(field_offset); if (auto insn = llvm::dyn_cast(ptr)) { insn->setMetadata("anvill.type", @@ -572,13 +567,16 @@ void BasicBlockLifter::UnpackLocals( } auto loaded_var_val = bldr.CreateLoad(decl.type, ptr, decl.name); field_offset += 1; - auto new_mem_ptr = StoreNativeValue( - loaded_var_val, decl, this->type_provider.Dictionary(), - this->intrinsics, bldr.GetInsertBlock(), into_state_ptr, - remill::LoadMemoryPointer(bldr, this->intrinsics)); + auto mem_ptr = remill::LoadMemoryPointer(bldr, this->intrinsics); + auto new_mem_ptr = + StoreNativeValue(loaded_var_val, decl, this->type_provider.Dictionary(), + this->intrinsics, bldr, into_state_ptr, mem_ptr); + bldr.SetInsertPoint(bldr.GetInsertBlock()); + bldr.CreateStore(new_mem_ptr, remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); } + CHECK(bldr.GetInsertPoint() == blk->end()); } @@ -615,7 +613,6 @@ void BasicBlockLifter::CallBasicBlockFunction( ind < this->block_context->GetAvailableVariables().size(); ind++) { auto ptr = ptr_provider(ind); CHECK(ptr != nullptr); - ptr->dump(); args.push_back(ptr); } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index ac3f4699c..f66a82969 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -189,7 +189,6 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( &F, v.index, this->lifter.Options(), cont); use->set(g); } - F.dump(); CHECK(!llvm::verifyFunction(F, &llvm::errs())); return to_replace_vars.empty() ? llvm::PreservedAnalyses::all() diff --git a/lib/Type.cpp b/lib/Type.cpp index 729ec509a..9aa8d54ce 100644 --- a/lib/Type.cpp +++ b/lib/Type.cpp @@ -21,15 +21,13 @@ #include // clang-format on +#include +#include #include - #include #include #include -#include -#include - #include #include #include @@ -67,8 +65,8 @@ class TypeSpecifierImpl { // Translates an llvm::Type to a type that conforms to the spec in // TypeSpecification.cpp -void TypeSpecifierImpl::EncodeType( - llvm::Type &type, std::stringstream &ss, EncodingFormat format) { +void TypeSpecifierImpl::EncodeType(llvm::Type &type, std::stringstream &ss, + EncodingFormat format) { const auto alpha_num = format == EncodingFormat::kValidSymbolCharsOnly; switch (type.getTypeID()) { case llvm::Type::VoidTyID: ss << 'v'; break; @@ -199,7 +197,7 @@ void TypeSpecifierImpl::EncodeType( } else if (struct_ptr == type_dict.u.named.padding) { ss << 'p'; - // This is an opaque structure; mark it as a void type. + // This is an opaque structure; mark it as a void type. } else if (struct_ptr->isOpaque()) { ss << 'v'; @@ -211,7 +209,7 @@ void TypeSpecifierImpl::EncodeType( if (type_to_id.count(struct_ptr)) { ss << (alpha_num ? "_M" : "%") << type_to_id[struct_ptr]; - // We've not yet serialized this structure. + // We've not yet serialized this structure. } else { // Start by emitting a new structure ID for this structure and memoizing @@ -238,10 +236,11 @@ void TypeSpecifierImpl::EncodeType( << (alpha_num ? "_D" : "]"); } - // TODO(pag): Investigate this possibility. Does this occur for - // bitfields? + // TODO(pag): Investigate this possibility. Does this occur for + // bitfields? } else if (expected_offset > offset) { - LOG(FATAL) << "TODO?! Maybe bitfields? Structure field offset shenanigans"; + LOG(FATAL) + << "TODO?! Maybe bitfields? Structure field offset shenanigans"; } const auto el_ty = struct_ptr->getElementType(i); @@ -408,8 +407,8 @@ namespace { #if ANVILL_USE_WRAPPED_TYPES template -static llvm::Type *GetOrCreateWrapper( - llvm::LLVMContext &context, const char *name, T wrapper) { +static llvm::Type *GetOrCreateWrapper(llvm::LLVMContext &context, + const char *name, T wrapper) { std::string type_name = kAnvillNamePrefix + name; auto ty = llvm::StructType::getTypeByName(context, type_name); if (ty) { @@ -420,25 +419,28 @@ static llvm::Type *GetOrCreateWrapper( return llvm::StructType::create(context, elems, type_name, true); } -static llvm::Type *GetOrCreateInt(llvm::LLVMContext &context, - const char *name, unsigned num_bits) { - return GetOrCreateWrapper(context, name, [=] (llvm::LLVMContext &context_) { +static llvm::Type *GetOrCreateInt(llvm::LLVMContext &context, const char *name, + unsigned num_bits) { + return GetOrCreateWrapper(context, name, [=](llvm::LLVMContext &context_) { return llvm::IntegerType::get(context_, num_bits); }); } static llvm::Type *GetOrCreateFloat(llvm::LLVMContext &context, - const char *name, unsigned num_bits) { - return GetOrCreateWrapper( - context, name, [=] (llvm::LLVMContext &context_) -> llvm::Type * { - switch (num_bits) { - case 16: return llvm::Type::getHalfTy(context_); - case 32: return llvm::Type::getFloatTy(context_); - case 64: return llvm::Type::getDoubleTy(context_); - case 128: return llvm::Type::getFP128Ty(context_); - default: return nullptr; - } - }); + const char *name, unsigned num_bits) { + return GetOrCreateWrapper(context, name, + [=](llvm::LLVMContext &context_) -> llvm::Type * { + switch (num_bits) { + case 16: return llvm::Type::getHalfTy(context_); + case 32: + return llvm::Type::getFloatTy(context_); + case 64: + return llvm::Type::getDoubleTy(context_); + case 128: + return llvm::Type::getFP128Ty(context_); + default: return nullptr; + } + }); } #endif @@ -467,17 +469,18 @@ TypeDictionary::TypeDictionary(llvm::LLVMContext &context) { u.named.float32 = GetOrCreateFloat(context, "float32", 32); u.named.float64 = GetOrCreateFloat(context, "float64", 64); u.named.float80_12 = GetOrCreateWrapper( - context, "float80_12", [] (llvm::LLVMContext &context_) { + context, "float80_12", [](llvm::LLVMContext &context_) { return llvm::ArrayType::get(llvm::Type::getInt8Ty(context_), 10); }); u.named.float80_16 = GetOrCreateWrapper( - context, "float80_16", [] (llvm::LLVMContext &context_) { + context, "float80_16", [](llvm::LLVMContext &context_) { return llvm::ArrayType::get(llvm::Type::getInt8Ty(context_), 12); }); u.named.float128 = GetOrCreateFloat(context, "float128", 128); - u.named.m64 = GetOrCreateWrapper(context, "mmx", [] (llvm::LLVMContext &context_) { - return llvm::Type::getX86_MMXTy(context_); - }); + u.named.m64 = + GetOrCreateWrapper(context, "mmx", [](llvm::LLVMContext &context_) { + return llvm::Type::getX86_MMXTy(context_); + }); u.named.void_ = GetOrCreateInt(context, "void", 8); u.named.padding = GetOrCreateInt(context, "padding", 8); #else @@ -515,7 +518,8 @@ bool TypeDictionary::IsPadding(llvm::Type *type) const noexcept { #if ANVILL_USE_WRAPPED_TYPES switch (type->getTypeID()) { case llvm::Type::StructTyID: - for (auto elem_type : llvm::dyn_cast(type)->elements()) { + for (auto elem_type : + llvm::dyn_cast(type)->elements()) { if (!IsPadding(elem_type)) { return false; } @@ -529,8 +533,7 @@ bool TypeDictionary::IsPadding(llvm::Type *type) const noexcept { auto elem_type = llvm::dyn_cast(type)->getElementType(); return IsPadding(elem_type); } - default: - return type == u.named.padding; + default: return type == u.named.padding; } #else return false; @@ -540,7 +543,7 @@ bool TypeDictionary::IsPadding(llvm::Type *type) const noexcept { TypeTranslator::~TypeTranslator(void) {} TypeTranslator::TypeTranslator(const TypeDictionary &type_dict, - const llvm::DataLayout &dl) + const llvm::DataLayout &dl) : impl(std::make_unique(type_dict, dl)) {} // Delegating constructor using a module's data layout. @@ -571,13 +574,13 @@ const llvm::DataLayout &TypeTranslator::DataLayout(void) const noexcept { // then only alpha_numeric characters (and underscores) are used. The // alpha_numeric representation is always safe to use when appended to // identifier names. -std::string TypeTranslator::EncodeToString( - llvm::Type *type, EncodingFormat format) const { +std::string TypeTranslator::EncodeToString(llvm::Type *type, + EncodingFormat format) const { std::stringstream ss; if (type) { impl->type_to_id.clear(); - impl->EncodeType( - *remill::RecontextualizeType(type, impl->context), ss, format); + impl->EncodeType(*remill::RecontextualizeType(type, impl->context), ss, + format); } return ss.str(); } @@ -678,9 +681,9 @@ FindTypeInList(llvm::Type *query, llvm::Type *const (&types)[kSize]) { } // namespace // Convert a value to a specific type. -llvm::Value *TypeDictionary::ConvertValueToType( - llvm::IRBuilderBase &ir, llvm::Value *src_val, - llvm::Type *dest_type) const { +llvm::Value *TypeDictionary::ConvertValueToType(llvm::IRBuilderBase &ir, + llvm::Value *src_val, + llvm::Type *dest_type) const { llvm::Type *src_type = src_val->getType(); if (src_type == dest_type) { @@ -699,26 +702,26 @@ llvm::Value *TypeDictionary::ConvertValueToType( // Unpack the source type, and then try to build it into the destination // type. This dispatches to the next case. if (maybe_src_type_index && maybe_dest_type_index) { -// unsigned indexes[] = {0u}; -// auto dest_val = ir.CreateExtractValue(src_val, indexes); -// CopyMetadataTo(src_val, dest_val); -// return ConvertValueToType(ir, dest_val, dest_type); + // unsigned indexes[] = {0u}; + // auto dest_val = ir.CreateExtractValue(src_val, indexes); + // CopyMetadataTo(src_val, dest_val); + // return ConvertValueToType(ir, dest_val, dest_type); LOG(FATAL) << "TODO"; return nullptr; - // Pack this type into a destination structure type. + // Pack this type into a destination structure type. } else if (!maybe_src_type_index && maybe_dest_type_index) { LOG(FATAL) << "TODO"; return nullptr; - // Unpack this type from a source structure type. + // Unpack this type from a source structure type. } else if (maybe_src_type_index && !maybe_dest_type_index) { unsigned indexes[] = {0u}; auto dest_val = ir.CreateExtractValue(src_val, indexes); CopyMetadataTo(src_val, dest_val); return AdaptToType(ir, dest_val, dest_type); - // Raw type adaptation. + // Raw type adaptation. } else { return AdaptToType(ir, src_val, dest_type); } diff --git a/lib/Utils.cpp b/lib/Utils.cpp index bd8ef11f1..d343ac467 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -160,7 +160,11 @@ llvm::Value *AdaptToType(llvm::IRBuilderBase &ir, llvm::Value *src, // If we want to change the type of a load, then we can change the type of // the loaded pointer. + // TODO(Ian): I think this might be buggy through recursion: + // we set the IP to something above the load so we now arent inserting where we expect to... if (auto li = llvm::dyn_cast(src)) { + auto blk = ir.GetInsertBlock(); + auto preip = ir.GetInsertPoint(); ir.SetInsertPoint(li); auto loaded_ptr = AdaptToType( ir, li->getPointerOperand(), @@ -171,6 +175,8 @@ llvm::Value *AdaptToType(llvm::IRBuilderBase &ir, llvm::Value *src, new_li->setAtomic(li->getOrdering(), li->getSyncScopeID()); new_li->setAlignment(li->getAlign()); CopyMetadataTo(li, new_li); + ir.SetInsertPoint(blk, preip); + return new_li; } @@ -281,6 +287,7 @@ void StoreNativeValueToRegister(llvm::Value *native_val, if (adapted_val) { store = ir.CreateStore(adapted_val, ptr_to_reg); + } else { auto ptr = ir.CreateBitCast(ptr_to_reg, llvm::PointerType::get(ir.getContext(), 0)); From 16de1c8c1446136e0f06a7ef5384d14ca0e7899f Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 16:06:32 -0500 Subject: [PATCH 083/163] fix call function to insert SP at ir --- lib/Declarations.cpp | 5 +- lib/Optimize.cpp | 47 ++++++++++++++++++- lib/Passes/RemoveCallIntrinsics.cpp | 8 +++- ...nctionReturnsWithAnvillFunctionReturns.cpp | 1 - 4 files changed, 54 insertions(+), 7 deletions(-) diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 520cb6a1e..d0a723f48 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -214,14 +214,13 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( // Go and get a pointer to the stack pointer register, so that we can // later store our computed return value stack pointer to it. auto sp_reg = arch->RegisterByName(arch->StackPointerRegisterName()); - const auto ptr_to_sp = sp_reg->AddressOf(state_ptr, ir.GetInsertBlock()); + const auto ptr_to_sp = sp_reg->AddressOf(state_ptr, ir); // Go and compute the value of the stack pointer on exit from // the function, which will be based off of the register state // on entry to the function. - auto new_sp_base = - return_stack_pointer->AddressOf(state_ptr, ir.GetInsertBlock()); + auto new_sp_base = return_stack_pointer->AddressOf(state_ptr, ir); const auto sp_val_on_exit = ir.CreateAdd( ir.CreateLoad(return_stack_pointer->type, new_sp_base), diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 5ebad5cca..bb2f7231c 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -113,6 +113,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, const BasicBlockContexts &contexts, const anvill::Specification &spec) { + + CHECK(!llvm::verifyModule(module, &llvm::errs())); const LifterOptions &options = lifter.Options(); EntityCrossReferenceResolver xr(lifter); @@ -181,37 +183,58 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, llvm::FunctionPassManager fpm; fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::VerifierPass()); // NOTE(alex): This pass is extremely slow with LLVM 14. // fpm.addPass(llvm::SinkingPass()); // NewGVN has bugs with `____strtold_l_internal` from chal5, amd64. fpm.addPass(llvm::NewGVNPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SCCPPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::DSEPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::EarlyCSEPass(true)); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::BDCEPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SimplifyCFGPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SinkingPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SimplifyCFGPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::InstCombinePass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(anvill::ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( contexts, lifter)); + fpm.addPass(llvm::VerifierPass()); AddSinkSelectionsIntoBranchTargets(fpm); + fpm.addPass(llvm::VerifierPass()); AddRemoveUnusedFPClassificationCalls(fpm); + fpm.addPass(llvm::VerifierPass()); AddRemoveDelaySlotIntrinsics(fpm); + fpm.addPass(llvm::VerifierPass()); AddRemoveErrorIntrinsics(fpm); + fpm.addPass(llvm::VerifierPass()); AddLowerRemillMemoryAccessIntrinsics(fpm); + fpm.addPass(llvm::VerifierPass()); AddRemoveCompilerBarriers(fpm); + fpm.addPass(llvm::VerifierPass()); // TODO(pag): This pass has an issue on the `SMIME_write_ASN1` function // of the ARM64 variant of Challenge 5. // AddHoistUsersOfSelectsAndPhis(fpm); fpm.addPass(llvm::InstCombinePass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::VerifierPass()); // Sometimes we observe patterns where PC- and SP-related offsets are // accidentally truncated, and thus displacement-based analyses make them @@ -220,18 +243,25 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // negative numbers. Thus, we want to fixup such cases prior to any kind of // stack analysis. AddConvertMasksToCasts(fpm); - + fpm.addPass(llvm::VerifierPass()); AddSinkSelectionsIntoBranchTargets(fpm); + fpm.addPass(llvm::VerifierPass()); AddRemoveTrivialPhisAndSelects(fpm); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::VerifierPass()); AddRemoveStackPointerCExprs(fpm, options.stack_frame_recovery_options); + fpm.addPass(llvm::VerifierPass()); //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::VerifierPass()); AddCombineAdjacentShifts(fpm); + fpm.addPass(llvm::VerifierPass()); // Sometimes we have a values in the form of (expr ^ 1) used as branch // conditions or other targets. Try to fix these to be CMPs, since it @@ -239,12 +269,16 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // but it comes up often enough for lifted code. AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(anvill::RemoveCallIntrinsics(xr, spec, lifter)); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::VerifierPass()); AddBranchRecovery(fpm); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(ConvertPointerArithmeticToGEP(contexts, types, structs, md)); - + fpm.addPass(llvm::VerifierPass()); pb.crossRegisterProxies(lam, fam, cam, mam); mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); @@ -253,16 +287,25 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, llvm::FunctionPassManager second_fpm; AddTransformRemillJumpIntrinsics(second_fpm, xr); + second_fpm.addPass(llvm::VerifierPass()); //AddRemoveRemillFunctionReturns(second_fpm, xr); //AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); AddLowerRemillUndefinedIntrinsics(second_fpm); + second_fpm.addPass(llvm::VerifierPass()); AddRemoveFailedBranchHints(second_fpm); + second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(llvm::NewGVNPass()); + second_fpm.addPass(llvm::VerifierPass()); AddSpreadPCMetadata(second_fpm, options); + second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(CodeQualityStatCollector()); + second_fpm.addPass(llvm::VerifierPass()); AddConvertXorsToCmps(second_fpm); + second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(llvm::DCEPass()); + second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(llvm::DSEPass()); + second_fpm.addPass(llvm::VerifierPass()); mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index 0509fcfff..57341a4ce 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -3,7 +3,9 @@ #include #include #include +#include #include +#include namespace anvill { llvm::StringRef RemoveCallIntrinsics::name(void) { @@ -26,7 +28,9 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, this->xref_resolver, remillFunctionCall->getFunction()->getParent()->getDataLayout()); auto ra = xref_folder.TryResolveReferenceWithClearedCache(target_func); - remillFunctionCall->dump(); + auto f = remillFunctionCall->getFunction(); + CHECK(!llvm::verifyFunction(*f, &llvm::errs())); + if (ra.references_entity || // Related to an existing lifted entity. ra.references_global_value || // Related to a global var/func. ra.references_program_counter) { // Related to `__anvill_pc`. @@ -51,6 +55,8 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, } } + CHECK(!llvm::verifyFunction(*f, &llvm::errs())); + return prev; } diff --git a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp index a67b9ce11..4f95d626f 100644 --- a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp +++ b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp @@ -65,7 +65,6 @@ ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( pres_analyses = llvm::PreservedAnalyses::none(); } - F.dump(); CHECK(!llvm::verifyFunction(F, &llvm::errs())); return pres_analyses; From 32d82638a5f333768d1232e315f1a7fa370d8084 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 16:30:32 -0500 Subject: [PATCH 084/163] handle pointer typed stack var sizes --- lib/Passes/ReplaceStackReferences.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index f66a82969..643177272 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -69,15 +69,20 @@ struct StackVariable { class StackModel { private: std::map frame; + const remill::Arch *arch; public: - static uint64_t GetParamDeclSize(const ParameterDecl &decl) { + uint64_t GetParamDeclSize(const ParameterDecl &decl) { + if (llvm::isa_and_nonnull(decl.type)) { + return this->arch->address_size / 8; + } + CHECK(decl.type->getPrimitiveSizeInBits() != 0); return decl.type->getPrimitiveSizeInBits() / 8; } StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { - + this->arch = arch; size_t index = 0; for (const auto &v : cont.GetAvailableVariables()) { if (v.mem_reg && v.mem_reg->name == arch->StackPointerRegisterName()) { @@ -133,7 +138,6 @@ class StackModel { void InsertFrameVar(size_t index, ParameterDecl var) { - CHECK(var.type->getPrimitiveSizeInBits() != 0); if (VarOverlaps(var.mem_offset) || VarOverlaps(var.mem_offset + GetParamDeclSize(var) - 1)) { From af1f6260d2ef9db13178e82c8add0948af64a0f0 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 17:17:10 -0500 Subject: [PATCH 085/163] enable avx by default --- lib/Specification.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Specification.cpp b/lib/Specification.cpp index a82abfaf3..04ba6d6fc 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -269,12 +269,12 @@ GetArch(llvm::LLVMContext &context, switch (spec.arch()) { default: return {"Invalid/unrecognized architecture"}; - case ::specification::ARCH_X86: arch_name = remill::kArchX86; break; + case ::specification::ARCH_X86: arch_name = remill::kArchX86_AVX; break; case ::specification::ARCH_X86_AVX: arch_name = remill::kArchX86_AVX; break; case ::specification::ARCH_X86_AVX512: arch_name = remill::kArchX86_AVX512; break; - case ::specification::ARCH_AMD64: arch_name = remill::kArchAMD64; break; + case ::specification::ARCH_AMD64: arch_name = remill::kArchAMD64_AVX; break; case ::specification::ARCH_AMD64_AVX: arch_name = remill::kArchAMD64_AVX; break; From 1ebd897c051a45c3758e4c624fa2e6ef4e92579c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 21 Dec 2022 18:44:24 -0500 Subject: [PATCH 086/163] use datalayout size --- lib/Passes/ReplaceStackReferences.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 643177272..9f52eeb78 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -73,12 +73,8 @@ class StackModel { public: uint64_t GetParamDeclSize(const ParameterDecl &decl) { - if (llvm::isa_and_nonnull(decl.type)) { - return this->arch->address_size / 8; - } - - CHECK(decl.type->getPrimitiveSizeInBits() != 0); - return decl.type->getPrimitiveSizeInBits() / 8; + CHECK(arch->DataLayout().getTypeSizeInBits(decl.type) != 0); + return arch->DataLayout().getTypeSizeInBits(decl.type) / 8; } StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { From 3fe263c144cc8c6338f5bab8c05ca03e641c3a4c Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 22 Dec 2022 12:34:10 -0500 Subject: [PATCH 087/163] update angha script --- scripts/test-angha-1k.sh | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/scripts/test-angha-1k.sh b/scripts/test-angha-1k.sh index d040d782f..2b7a9b54f 100755 --- a/scripts/test-angha-1k.sh +++ b/scripts/test-angha-1k.sh @@ -1,15 +1,14 @@ #!/bin/bash DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) SRC_DIR=$( cd "$( dirname "${DIR}" )" && pwd ) - -ANVILL_PYTHON="python3 -m anvill" +GHIDRA_INSTALL_DIR="~/ghidra_10.1.5_PUBLIC/" ANVILL_DECOMPILE="anvill-decompile-spec" function Help { echo "Run Anvill on AnghaBech-1K" echo "" echo "Options:" - echo " --python-cmd The anvill Python command to invoke. Default ${ANVILL_PYTHON}" + echo " --ghidra-install-dir The ghidra install dir. Default ${GHIDRA_INSTALL_DIR}" echo " --decompile-cmd The anvill decompile command to invoke. Default ${ANVILL_DECOMPILE}" echo " -h --help Print help." } @@ -60,12 +59,12 @@ while [[ $# -gt 0 ]] ; do exit 0 ;; - # Anvill python cmd - --python-cmd) - ANVILL_PYTHON=${2} + --ghidra-install-dir) + GHIDRA_INSTALL_DIR=${2} shift # past argument ;; + # How large of a run to get --decompile-cmd) ANVILL_DECOMPILE=${2} @@ -89,11 +88,6 @@ then exit 1 fi -if ! ${ANVILL_PYTHON} --help &>/dev/null; -then - echo "[!] Could not execute anvill python cmd: ${ANVILL_PYTHON}" - exit 1 -fi if ! ${ANVILL_DECOMPILE} --version &>/dev/null; then @@ -118,7 +112,7 @@ for arch in $(ls -1 binaries/) do echo "[+] Testing architecture ${arch}" ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py \ - --anvill-python "${ANVILL_PYTHON}" \ + --ghidra-install-dir "${GHIDRA_INSTALL_DIR}" \ --anvill-decompile "${ANVILL_DECOMPILE}" \ --input-dir "$(pwd)/binaries/${arch}" \ --output-dir "$(pwd)/results/${arch}" \ From 6d9eb56ccad22b9165d7b8b9db4bf15f2457fd8b Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 22 Dec 2022 13:25:03 -0500 Subject: [PATCH 088/163] bump tool ci --- libraries/lifting-tools-ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 1785bf696..4a42f5517 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 1785bf69687b24b2611bbf5fdc4fb5caa4c4fdb3 +Subproject commit 4a42f5517731a5710f2e27aceacd89b837b7440b From 1df0ee5b96417662b023a30a9b86ff9977ace467 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 6 Jan 2023 11:58:27 -0500 Subject: [PATCH 089/163] refactor to lift all live variables (#341) * refactor to lift all live variables * implement shared liveness utilities * convert bytes to bits... * insert into right vector --- bin/Decompile/Main.cpp | 6 +- data_specifications/specification.proto | 111 +++++++++++--- include/anvill/Declarations.h | 78 +++++----- lib/Declarations.cpp | 183 +++++++++++++++--------- lib/Lifters/BasicBlockLifter.cpp | 117 ++++++--------- lib/Lifters/BasicBlockLifter.h | 25 ++-- lib/Passes/ReplaceStackReferences.cpp | 9 +- lib/Protobuf.cpp | 77 +++++----- lib/Protobuf.h | 6 + lib/Utils.cpp | 2 +- 10 files changed, 357 insertions(+), 257 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 0bb78e154..b51239e20 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -43,6 +43,8 @@ DEFINE_bool(add_breakpoints, false, "lifted bitcode."); DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); +DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); + DEFINE_string( default_callable_spec, "", @@ -242,8 +244,10 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } + if (!FLAGS_disable_opt) { + anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); + } - anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); int ret = EXIT_SUCCESS; diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 5b2e38b0b..0ef8d0822 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -100,14 +100,6 @@ message Memory { int64 offset = 2; } -message Value { - oneof inner_value { - Register reg = 1; - Memory mem = 2; - } -} - - message Variable { repeated Value values = 1; TypeSpec type = 2; @@ -159,6 +151,15 @@ message Callable { } + +message ValueDomain { + oneof inner { + HighSymbol symb = 1; + int64 stack_disp = 2; + int64 constant = 3; + } +} + // These arent quite affine relations we just store // relationships of Reg=Reg'+Offset where Reg' is the register value at entry // to the function. @@ -167,20 +168,96 @@ message Callable { // what we want at the end to determine stack relationships. // This very closely matches ghidra's SymbolicPropogator: // https://ghidra.re/ghidra_docs/api/ghidra/program/util/SymbolicPropogator.Value.html#getRelativeRegister() -message OffsetDomain { - string target_reg = 1; - // An empty base indicates a constant. - optional string base = 2; - int64 offset = 3; +message ValueMapping { + Variable target_value = 1; + ValueDomain curr_val = 2; } -message BlockContext { - repeated OffsetDomain symvals = 1; +enum HighLoc { + HIGH_LOC_UNSPECIFIED = 0; + HIGH_LOC_PARAM = 1; + HIGH_LOC_LOCAL = 2; +} + + - repeated Register live_at_entries = 2; - repeated Register live_at_exits = 3; +message SymbolMapping { + HighSymbol high_loc = 1; + Variable low_loc = 2; } +message HighSymbol { + string name = 1; + HighLoc location = 2; +} + +message Value { + oneof inner_value { + Register reg = 1; + Memory mem = 2; + } +} + +/* + block1: + u1 = param_1 + 3 + STORE u1, [sp-4] + r1 = param_1 + block2: + u1 = LOAD [sp-4] + r2 = r1 + u1 + (live r2) +*/ + +/* + Current lift: + block1(param_1, reg_state): + store param_1 + 3, localSub4 + store param_1, r1(reg_state) + +block2(reg_state): + %1 = load localSub4 + %2 = load r1(reg_state) + %3 = iadd %1, %2 + store %3, r2(reg_state) + + This is a world where high variables are exactly bound to locations +*/ + +/* + With affine equalities + So r1 and the var are still live so need to copy their state + block1(param_1, reg_state): + store param_1 + 3, localSub4 + store param_1, r1(reg_state) + + + // here tho we have the affine equalities r1 = param1 local_sub=param_1+3 + // So we could do +block2(param_1, reg_state): + %1 = param_1+3 + %2 = iadd param_1, %1 + store %2, r2(reg_state) + + +But then what hppens if the user patches block1 +to + block1(param_1, reg_state): + store param_1 + 2, localSub4 + store r2, r1(reg_state) .... we cant really support multiple block patches at once + */ + +message BlockContext { + // Affine equalities between values + // and high symbols at entry to + // the block + repeated ValueMapping symvals = 1; + + repeated Parameter live_at_entries = 2; + repeated Parameter live_at_exits = 3; +} + + message CodeBlock { uint64 address = 1; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 834c480cb..843f357c4 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -56,16 +56,6 @@ struct CodeBlock { }; -struct OffsetDomain { - const remill::Register *target_register; - std::optional base_register; - std::int64_t offset; -}; -struct SpecStackOffsets { - std::vector affine_equalities; -}; - - class TypeDictionary; // A value, such as a parameter or a return value. Values are resident @@ -122,6 +112,15 @@ struct VariableDecl { llvm::Module &) const; }; +struct OffsetDomain { + ValueDecl target_value; + std::int64_t stack_offset; +}; +struct SpecStackOffsets { + std::vector affine_equalities; +}; + + // A declaration for a callable entity. struct CallableDecl { private: @@ -200,29 +199,38 @@ struct LocalVariableDecl { std::vector values; }; -class BasicBlockContext { - private: - std::vector RegistersNotInVariables( - const std::vector &all) const; +// Basic block contexts impose an ordering on live values s.t. shared Parameters between +// live exits and entries +struct BasicBlockVariable { + ParameterDecl param; + size_t index; + bool live_at_entry; + bool live_at_exit; +}; + +class BasicBlockContext { public: virtual ~BasicBlockContext() = default; - virtual std::vector GetAvailableVariables() const = 0; + virtual const SpecStackOffsets &GetStackOffsets() const = 0; virtual const std::vector &ReturnValue() const = 0; - virtual const std::vector & - LiveRegistersAtEntry() const = 0; - virtual const std::vector & - LiveRegistersAtExit() const = 0; + // Deduplicates locations and ensures there are no overlapping decls + // A valid parameter list is a set of non overlapping a-locs with distinct names. + std::vector LiveParamsAtEntryAndExit() const; + + + std::vector LiveBBParamsAtEntry() const; + std::vector LiveBBParamsAtExit() const; - std::vector - LiveRegistersNotInVariablesAtEntry() const; - std::vector - LiveRegistersNotInVariablesAtExit() const; llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; + + protected: + virtual const std::vector &LiveParamsAtEntry() const = 0; + virtual const std::vector &LiveParamsAtExit() const = 0; }; struct FunctionDecl; @@ -230,24 +238,22 @@ class SpecBlockContext : public BasicBlockContext { private: const FunctionDecl &decl; SpecStackOffsets offsets; - std::vector live_regs_at_entry; - std::vector live_regs_at_exit; + std::vector live_params_at_entry; + std::vector live_params_at_exit; public: SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets, - std::vector live_regs_at_entry, - std::vector live_regs_at_exit); + std::vector live_params_at_entry, + std::vector live_params_at_exit); - virtual std::vector GetAvailableVariables() const override; virtual const SpecStackOffsets &GetStackOffsets() const override; - virtual const std::vector & - LiveRegistersAtEntry() const override; - // should be a subset of live registers at entry - virtual const std::vector & - LiveRegistersAtExit() const override; - virtual const std::vector &ReturnValue() const override; + + + protected: + virtual const std::vector &LiveParamsAtEntry() const override; + virtual const std::vector &LiveParamsAtExit() const override; }; // A function decl, as represented at a "near ABI" level. To be specific, @@ -286,10 +292,10 @@ struct FunctionDecl : public CallableDecl { std::unordered_map stack_offsets; - std::unordered_map> + std::unordered_map> live_regs_at_entry; - std::unordered_map> + std::unordered_map> live_regs_at_exit; std::uint64_t stack_depth; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index d0a723f48..766c3ed83 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,36 @@ #include "Protobuf.h" #include "anvill/Specification.h" + +namespace { +// A value decl without a type, we assume a parameter occupying a location occupies the entire location +// or that offsets are disjoin, this isnt completely correct if we start doing better stack liveness TODO(Ian) +// We would need to check offset+size overlaps +struct LocBase { + const remill::Register *reg_loc{nullptr}; + const remill::Register *mem_base{nullptr}; + std::int64_t offset{0}; + + bool operator==(const LocBase &lbase) const { + return reg_loc == lbase.reg_loc && mem_base == lbase.mem_base && + lbase.offset == offset; + } +}; +} // namespace + + +namespace std { +template <> +struct std::hash { + std::size_t operator()(const LocBase &c) const { + std::size_t result = 0; + + + return result; + } +}; +} // namespace std + namespace anvill { // Declare this global variable in an LLVM module. @@ -66,13 +97,83 @@ void FunctionDecl::AddBBContexts( } +// need to be careful here about overlapping values +std::vector +BasicBlockContext::LiveParamsAtEntryAndExit() const { + auto live_exits = this->LiveParamsAtExit(); + auto live_entries = this->LiveParamsAtEntry(); + + + auto convert_to_locbas = [](const ParameterDecl ¶m) -> LocBase { + return {param.reg, param.mem_reg, param.mem_offset}; + }; + + auto add_to_set = [convert_to_locbas]( + const std::vector ¶ms, + std::unordered_set &locs_to_add) { + std::transform(params.begin(), params.end(), + std::inserter(locs_to_add, locs_to_add.end()), + convert_to_locbas); + }; + + std::unordered_set covered_live_ent; + add_to_set(live_entries, covered_live_ent); + std::unordered_set covered_live_exit; + add_to_set(live_exits, covered_live_exit); + + std::vector res; + std::unordered_set covered; + auto add_all_from_vector = [&res, &covered, &covered_live_ent, + &covered_live_exit, convert_to_locbas]( + std::vector params) { + for (auto p : params) { + auto lbase = convert_to_locbas(p); + auto live_at_ent = covered_live_ent.find(lbase) != covered_live_ent.end(); + auto live_at_exit = + covered_live_exit.find(lbase) != covered_live_exit.end(); + CHECK(covered.find(lbase) == covered.end() || + (live_at_ent && live_at_exit)); + if (covered.find(lbase) == covered.end()) { + covered.insert(lbase); + auto ind = res.size(); + res.push_back({p, ind, live_at_ent, live_at_exit}); + } + } + }; + + add_all_from_vector(live_entries); + add_all_from_vector(live_entries); + return res; +} + + +std::vector BasicBlockContext::LiveBBParamsAtEntry() const { + auto alllive = this->LiveParamsAtEntryAndExit(); + std::vector res; + std::copy_if( + alllive.begin(), alllive.end(), std::back_inserter(res), + [](const BasicBlockVariable &bbvar) { return bbvar.live_at_entry; }); + return res; +} + +std::vector BasicBlockContext::LiveBBParamsAtExit() const { + auto alllive = this->LiveParamsAtEntryAndExit(); + std::vector res; + std::copy_if( + alllive.begin(), alllive.end(), std::back_inserter(res), + [](const BasicBlockVariable &bbvar) { return bbvar.live_at_exit; }); + return res; +} + llvm::StructType * BasicBlockContext::StructTypeFromVars(llvm::LLVMContext &llvm_context) const { - auto in_scope_locals = this->GetAvailableVariables(); + std::vector in_scope_locals = + this->LiveParamsAtEntryAndExit(); std::vector field_types; - std::transform(in_scope_locals.begin(), in_scope_locals.end(), - std::back_inserter(field_types), - [](const ParameterDecl ¶m) { return param.type; }); + std::transform( + in_scope_locals.begin(), in_scope_locals.end(), + std::back_inserter(field_types), + [](const BasicBlockVariable ¶m) { return param.param.type; }); return llvm::StructType::get(llvm_context, field_types, "sty_for_basic_block_function"); @@ -124,73 +225,21 @@ const std::vector &SpecBlockContext::ReturnValue() const { } -std::vector -BasicBlockContext::RegistersNotInVariables( - const std::vector &all) const { - - std::unordered_set covered_registers; - for (auto cov_var : this->GetAvailableVariables()) { - if (cov_var.reg) { - covered_registers.insert(cov_var.reg); - } - } - std::vector res; - std::copy_if(all.begin(), all.end(), std::back_inserter(res), - [&covered_registers](const remill::Register *reg) { - return covered_registers.find(reg) == covered_registers.end(); - }); - return res; -} - - -std::vector -BasicBlockContext::LiveRegistersNotInVariablesAtEntry() const { - return this->RegistersNotInVariables(this->LiveRegistersAtEntry()); -} -std::vector -BasicBlockContext::LiveRegistersNotInVariablesAtExit() const { - return this->RegistersNotInVariables(this->LiveRegistersAtExit()); -} - - -const std::vector & -SpecBlockContext::LiveRegistersAtEntry() const { - return this->live_regs_at_entry; -} - SpecBlockContext::SpecBlockContext( const FunctionDecl &decl, SpecStackOffsets offsets, - std::vector live_regs_at_entry, - std::vector live_regs_at_exit) + std::vector live_params_at_entry, + std::vector live_params_at_exit) : decl(decl), offsets(std::move(offsets)), - live_regs_at_entry(std::move(live_regs_at_entry)), - live_regs_at_exit(std::move(live_regs_at_exit)) {} + live_params_at_entry(std::move(live_params_at_entry)), + live_params_at_exit(std::move(live_params_at_exit)) {} -const std::vector & -SpecBlockContext::LiveRegistersAtExit() const { - return this->live_regs_at_exit; +const std::vector &SpecBlockContext::LiveParamsAtExit() const { + return this->live_params_at_exit; } - -std::vector SpecBlockContext::GetAvailableVariables() const { - std::vector decls; - for (auto p : this->decl.params) { - decls.push_back(p); - } - - for (auto [nm, l] : this->decl.locals) { - if (l.values.size() == 1) { - - ParameterDecl d = { - {l.values[0].reg, l.values[0].mem_reg, l.values[0].mem_offset, - l.values[0].spec_type, l.values[0].type}, - nm}; - decls.push_back(std::move(d)); - } - } - - return decls; +const std::vector &SpecBlockContext::LiveParamsAtEntry() const { + return this->live_params_at_entry; } const SpecStackOffsets &SpecBlockContext::GetStackOffsets() const { @@ -396,10 +445,8 @@ V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { return SpecBlockContext( *this, GetWithDef(addr, this->stack_offsets, SpecStackOffsets()), - GetWithDef(addr, this->live_regs_at_entry, - std::vector()), - GetWithDef(addr, this->live_regs_at_exit, - std::vector())); + GetWithDef(addr, this->live_regs_at_entry, std::vector()), + GetWithDef(addr, this->live_regs_at_exit, std::vector())); } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 0cce376ac..3c7d401cb 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -41,10 +41,7 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { this->RecursivelyInlineFunctionCallees(bbfunc.func); anvill::EntityLifter lifter(options); - auto avails = this->block_context->GetAvailableVariables(); - - return CallableBasicBlockFunction(bbfunc.func, std::move(avails), block_def, - std::move(*this)); + return CallableBasicBlockFunction(bbfunc.func, block_def, std::move(*this)); } @@ -354,32 +351,6 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { } -void BasicBlockLifter::InitializeLiveUncoveredRegs(llvm::Value *state_argument, - llvm::IRBuilder<> &ir) { - auto need_to_init = this->block_context->LiveRegistersNotInVariablesAtEntry(); - - for (auto init_reg : need_to_init) { - auto reg_src_ptr = init_reg->AddressOf(state_argument, ir); - auto reg_dest_ptr = init_reg->AddressOf(this->state_ptr, ir); - auto reg_type = - remill::RecontextualizeType(init_reg->type, this->llvm_context); - ir.CreateStore(ir.CreateLoad(reg_type, reg_src_ptr), reg_dest_ptr); - } -} - -void BasicBlockLifter::SaveLiveUncoveredRegs(llvm::Value *state_argument, - llvm::IRBuilder<> &ir) { - auto need_to_init = this->block_context->LiveRegistersNotInVariablesAtExit(); - - for (auto init_reg : need_to_init) { - auto reg_src_ptr = init_reg->AddressOf(this->state_ptr, ir); - auto reg_dest_ptr = init_reg->AddressOf(state_argument, ir); - auto reg_type = - remill::RecontextualizeType(init_reg->type, this->llvm_context); - ir.CreateStore(ir.CreateLoad(reg_type, reg_src_ptr), reg_dest_ptr); - } -} - llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } @@ -421,10 +392,10 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto start_ind = lifted_func_type->getNumParams() + 1; - for (auto v : this->block_context->GetAvailableVariables()) { - if (!v.name.empty()) { + for (auto v : this->block_context->LiveParamsAtEntryAndExit()) { + if (!v.param.name.empty()) { auto arg = remill::NthArgument(func, start_ind); - arg->setName(v.name); + arg->setName(v.param.name); } start_ind += 1; } @@ -470,7 +441,6 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->state_ptr = this->AllocateAndInitializeStateStructure(&blk, options.arch); - this->InitializeLiveUncoveredRegs(state, ir); // Put registers that are referencing the stack in terms of their displacement so that we // Can resolve these stack references later . @@ -482,21 +452,23 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto stack_offsets = this->block_context->GetStackOffsets(); for (auto ®_off : stack_offsets.affine_equalities) { - if (reg_off.base_register && reg_off.base_register == this->sp_reg) { - auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( - ir, this->sp_reg, this->block_def.addr, reg_off.offset); - StoreNativeValueToRegister(new_value, reg_off.target_register, - type_provider.Dictionary(), intrinsics, ir, - this->state_ptr); - } + auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( + ir, this->sp_reg, this->block_def.addr, reg_off.stack_offset); + auto nmem = StoreNativeValue( + new_value, reg_off.target_value, type_provider.Dictionary(), intrinsics, + ir, this->state_ptr, remill::LoadMemoryPointer(ir, intrinsics)); + ir.CreateStore(nmem, remill::LoadMemoryPointerRef(ir.GetInsertBlock())); } PointerProvider ptr_provider = [this, func](size_t index) -> llvm::Value * { return this->ProvidePointerFromFunctionArgs(func, index); }; - this->UnpackLocals(ir, ptr_provider, this->state_ptr, - this->block_context->GetAvailableVariables()); + + LOG(INFO) << "Live values at entry to function " + << this->block_context->LiveBBParamsAtEntry().size(); + this->UnpackLiveValues(ir, ptr_provider, this->state_ptr, + this->block_context->LiveBBParamsAtEntry()); auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); @@ -516,10 +488,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto ret_mem = ir.CreateCall(this->lifted_func, args); - this->PackLocals(ir, this->state_ptr, ptr_provider, - this->block_context->GetAvailableVariables()); + this->PackLiveValues(ir, this->state_ptr, ptr_provider, + this->block_context->LiveBBParamsAtExit()); - this->SaveLiveUncoveredRegs(state, ir); CHECK(ir.GetInsertPoint() == func->getEntryBlock().end()); ir.CreateRet(ret_mem); @@ -535,42 +506,41 @@ llvm::StructType *BasicBlockLifter::StructTypeFromVars() const { } // Packs in scope variables into a struct -void BasicBlockLifter::PackLocals( +void BasicBlockLifter::PackLiveValues( llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, - PointerProvider into_vars, const std::vector &decls) const { + PointerProvider into_vars, + const std::vector &decls) const { - uint64_t field_offset = 0; for (auto decl : decls) { - auto ptr = into_vars(field_offset); - field_offset += 1; + auto ptr = into_vars(decl.index); auto state_loaded_value = LoadLiftedValue( - decl, this->type_provider.Dictionary(), this->intrinsics, bldr, + decl.param, this->type_provider.Dictionary(), this->intrinsics, bldr, from_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); bldr.CreateStore(state_loaded_value, ptr); } } -void BasicBlockLifter::UnpackLocals( +void BasicBlockLifter::UnpackLiveValues( llvm::IRBuilder<> &bldr, PointerProvider returned_value, llvm::Value *into_state_ptr, - const std::vector &decls) const { + const std::vector &decls) const { auto blk = bldr.GetInsertBlock(); - uint64_t field_offset = 0; + for (auto decl : decls) { - ; - auto ptr = returned_value(field_offset); + auto ptr = returned_value(decl.index); if (auto insn = llvm::dyn_cast(ptr)) { - insn->setMetadata("anvill.type", - this->type_specifier.EncodeToMetadata(decl.spec_type)); + insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( + decl.param.spec_type)); } - auto loaded_var_val = bldr.CreateLoad(decl.type, ptr, decl.name); - field_offset += 1; + auto loaded_var_val = + bldr.CreateLoad(decl.param.type, ptr, decl.param.name); + auto mem_ptr = remill::LoadMemoryPointer(bldr, this->intrinsics); - auto new_mem_ptr = - StoreNativeValue(loaded_var_val, decl, this->type_provider.Dictionary(), - this->intrinsics, bldr, into_state_ptr, mem_ptr); + auto new_mem_ptr = StoreNativeValue( + loaded_var_val, decl.param, this->type_provider.Dictionary(), + this->intrinsics, bldr, into_state_ptr, mem_ptr); bldr.SetInsertPoint(bldr.GetInsertBlock()); bldr.CreateStore(new_mem_ptr, @@ -605,12 +575,12 @@ void BasicBlockLifter::CallBasicBlockFunction( return this->ProvidePointerFromStruct(builder, out_param_locals, index); }; - this->PackLocals(builder, parent_state, ptr_provider, - cbfunc.GetInScopeVaraibles()); + this->PackLiveValues(builder, parent_state, ptr_provider, + this->block_context->LiveBBParamsAtEntry()); for (size_t ind = 0; - ind < this->block_context->GetAvailableVariables().size(); ind++) { + ind < this->block_context->LiveParamsAtEntryAndExit().size(); ind++) { auto ptr = ptr_provider(ind); CHECK(ptr != nullptr); args.push_back(ptr); @@ -622,8 +592,8 @@ void BasicBlockLifter::CallBasicBlockFunction( builder.CreateStore(new_mem_ptr, mem_ptr_ref); - this->UnpackLocals(builder, ptr_provider, parent_state, - cbfunc.GetInScopeVaraibles()); + this->UnpackLiveValues(builder, ptr_provider, parent_state, + this->block_context->LiveBBParamsAtExit()); } @@ -653,10 +623,8 @@ BasicBlockLifter::BasicBlockLifter( } CallableBasicBlockFunction::CallableBasicBlockFunction( - llvm::Function *func, std::vector in_scope_locals, - CodeBlock block, BasicBlockLifter bb_lifter) + llvm::Function *func, CodeBlock block, BasicBlockLifter bb_lifter) : func(func), - in_scope_locals(in_scope_locals), block(block), bb_lifter(std::move(bb_lifter)) {} @@ -669,11 +637,6 @@ llvm::Function *CallableBasicBlockFunction::GetFunction() const { return this->func; } -const std::vector & -CallableBasicBlockFunction::GetInScopeVaraibles() const { - return this->in_scope_locals; -} - llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::IRBuilder<> &ir, llvm::Value *target_sty, size_t index) const { diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index ce0d2f9a5..da0939717 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -89,12 +89,6 @@ class BasicBlockLifter : public CodeLifter { llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; - - void InitializeLiveUncoveredRegs(llvm::Value *state_argument, - llvm::IRBuilder<> &); - - void SaveLiveUncoveredRegs(llvm::Value *state_argument, llvm::IRBuilder<> &); - public: BasicBlockLifter(std::unique_ptr block_context, const CodeBlock &block_def, const LifterOptions &options_, @@ -112,13 +106,13 @@ class BasicBlockLifter : public CodeLifter { using PointerProvider = std::function; // Packs in scope variables into a struct - void PackLocals(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, - PointerProvider into_vars, - const std::vector &decls) const; + void PackLiveValues(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, + PointerProvider into_vars, + const std::vector &decls) const; - void UnpackLocals(llvm::IRBuilder<> &, PointerProvider returned_value, - llvm::Value *into_state_ptr, - const std::vector &) const; + void UnpackLiveValues(llvm::IRBuilder<> &, PointerProvider returned_value, + llvm::Value *into_state_ptr, + const std::vector &) const; // Calls a basic block function and unpacks the result into the state @@ -133,17 +127,14 @@ class CallableBasicBlockFunction { private: llvm::Function *func; - std::vector in_scope_locals; CodeBlock block; BasicBlockLifter bb_lifter; public: - CallableBasicBlockFunction(llvm::Function *func, - std::vector in_scope_locals, - CodeBlock block, BasicBlockLifter bb_lifter); + CallableBasicBlockFunction(llvm::Function *func, CodeBlock block, + BasicBlockLifter bb_lifter); - const std::vector &GetInScopeVaraibles() const; llvm::Function *GetFunction() const; llvm::StructType *GetRetType() const; diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 9f52eeb78..d3c7e8031 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -80,9 +80,12 @@ class StackModel { StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { this->arch = arch; size_t index = 0; - for (const auto &v : cont.GetAvailableVariables()) { - if (v.mem_reg && v.mem_reg->name == arch->StackPointerRegisterName()) { - this->InsertFrameVar(index, v); + // this feels weird maybe it should be all stack variables but then if the variable isnt live... + // we will have discovered something that should have been live. + for (const auto &v : cont.LiveParamsAtEntryAndExit()) { + if (v.param.mem_reg && + v.param.mem_reg->name == arch->StackPointerRegisterName()) { + this->InsertFrameVar(index, v.param); } index += 1; } diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 5a848e47b..b840d8e15 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -542,26 +542,21 @@ Result ProtobufTranslator::DecodeFunction( return decl; } - -namespace { -void AddRegistersToBB( - std::unordered_map> &map, - uint64_t bb_addr, const remill::Arch *arch, - const ::google::protobuf::RepeatedPtrField<::specification::Register> - ®s) { - auto &v = map.insert({bb_addr, std::vector()}) - .first->second; - - for (auto reg : regs) { - auto fill_reg = arch->RegisterByName(reg.register_name()); - if (fill_reg) { - v.push_back(fill_reg); - } else { - LOG(ERROR) << "No reg for: " << reg.register_name(); - } +void ProtobufTranslator::AddLiveValuesToBB( + std::unordered_map> &map, + uint64_t bb_addr, + const ::google::protobuf::RepeatedPtrField<::specification::Parameter> + &values) const { + auto &v = map.insert({bb_addr, std::vector()}).first->second; + + for (auto var : values) { + LOG_IF(FATAL, var.repr_var().values_size() != 1) + << "Symbols must be represented by a single valuedecl."; + auto param = DecodeParameter(var); + LOG_IF(FATAL, !param.Succeeded()) << "Unable to decode live parameter"; + v.push_back(param.TakeValue()); } } -} // namespace void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { @@ -583,35 +578,43 @@ void ProtobufTranslator::ParseCFGIntoFunction( auto blk = decl.cfg[blk_addr]; for (auto &symval : ctx.symvals()) { OffsetDomain reg_off; - reg_off.offset = symval.offset(); - reg_off.target_register = arch->RegisterByName(symval.target_reg()); - if (!reg_off.target_register) { - LOG(ERROR) << "Missing base register for affine relation: " - << symval.target_reg(); - continue; + + if (!symval.has_target_value()) { + LOG(FATAL) << "All equalities must have a target"; } - if (symval.has_base()) { - reg_off.base_register = arch->RegisterByName(symval.base()); - if (!reg_off.base_register) { - LOG(ERROR) << "Missing base register for affine relation: " - << symval.base(); - continue; - } - } else { - reg_off.base_register = std::nullopt; + + auto stackptr = arch->RegisterByName(arch->StackPointerRegisterName()); + if (!stackptr) { + LOG(FATAL) << "No stack ptr"; + } + + auto stackptr_type_spec = SizeToType(stackptr->size * 8); + + auto target_vdecl = + DecodeValue(symval.target_value().values()[0], stackptr_type_spec, + "Unable to get value decl for stack offset relation"); + LOG_IF(FATAL, !target_vdecl.Succeeded()) << "Failed to lift value"; + if (!symval.has_curr_val()) { + LOG(FATAL) << "Mapping should have current value"; } + LOG_IF(FATAL, !symval.curr_val().has_stack_disp()) + << "Only stack displacements supported for affine relations"; + + reg_off.stack_offset = symval.curr_val().stack_disp(); + reg_off.target_value = target_vdecl.TakeValue(); + affine_equalities.push_back(reg_off); } SpecStackOffsets off = {affine_equalities}; decl.stack_offsets.insert({blk_addr, off}); - AddRegistersToBB(decl.live_regs_at_entry, blk_addr, this->arch, - ctx.live_at_entries()); + this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_addr, + ctx.live_at_entries()); - AddRegistersToBB(decl.live_regs_at_exit, blk_addr, this->arch, - ctx.live_at_exits()); + this->AddLiveValuesToBB(decl.live_regs_at_exit, blk_addr, + ctx.live_at_exits()); } } diff --git a/lib/Protobuf.h b/lib/Protobuf.h index 4cd794c43..7f93a85e3 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -77,6 +77,12 @@ class ProtobufTranslator { void ParseCFGIntoFunction(const ::specification::Function &obj, FunctionDecl &decl) const; + void AddLiveValuesToBB( + std::unordered_map> &map, + uint64_t bb_addr, + const ::google::protobuf::RepeatedPtrField<::specification::Parameter> + &values) const; + public: explicit ProtobufTranslator( diff --git a/lib/Utils.cpp b/lib/Utils.cpp index d343ac467..631633254 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -811,7 +811,7 @@ ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, const anvill::LifterOptions &options, const anvill::BasicBlockContext &context) { CHECK(options.arch->LiftedFunctionType()->getNumParams() + 1 + - context.GetAvailableVariables().size() == + context.LiveParamsAtEntryAndExit().size() == func->arg_size()); return func->getArg(index + options.arch->LiftedFunctionType()->getNumParams() + 1); From 9a59b072caf73edbc4339a1688b42250a4c5e46f Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 6 Jan 2023 12:55:28 -0500 Subject: [PATCH 090/163] formatting --- lib/Lifters/BasicBlockLifter.cpp | 44 ++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 3c7d401cb..4ad721bb3 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -512,13 +512,16 @@ void BasicBlockLifter::PackLiveValues( const std::vector &decls) const { for (auto decl : decls) { - auto ptr = into_vars(decl.index); - auto state_loaded_value = LoadLiftedValue( - decl.param, this->type_provider.Dictionary(), this->intrinsics, bldr, - from_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); + if (!decl.param.mem_reg) { + auto ptr = into_vars(decl.index); - bldr.CreateStore(state_loaded_value, ptr); + auto state_loaded_value = LoadLiftedValue( + decl.param, this->type_provider.Dictionary(), this->intrinsics, bldr, + from_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); + + bldr.CreateStore(state_loaded_value, ptr); + } } } @@ -529,22 +532,25 @@ void BasicBlockLifter::UnpackLiveValues( auto blk = bldr.GetInsertBlock(); for (auto decl : decls) { - auto ptr = returned_value(decl.index); - if (auto insn = llvm::dyn_cast(ptr)) { - insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( - decl.param.spec_type)); - } - auto loaded_var_val = - bldr.CreateLoad(decl.param.type, ptr, decl.param.name); + // is this how we want to do this.... now the value really doesnt live in memory anywhere but the frame. + if (!decl.param.mem_reg) { + auto ptr = returned_value(decl.index); + if (auto insn = llvm::dyn_cast(ptr)) { + insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( + decl.param.spec_type)); + } + auto loaded_var_val = + bldr.CreateLoad(decl.param.type, ptr, decl.param.name); - auto mem_ptr = remill::LoadMemoryPointer(bldr, this->intrinsics); - auto new_mem_ptr = StoreNativeValue( - loaded_var_val, decl.param, this->type_provider.Dictionary(), - this->intrinsics, bldr, into_state_ptr, mem_ptr); - bldr.SetInsertPoint(bldr.GetInsertBlock()); + auto mem_ptr = remill::LoadMemoryPointer(bldr, this->intrinsics); + auto new_mem_ptr = StoreNativeValue( + loaded_var_val, decl.param, this->type_provider.Dictionary(), + this->intrinsics, bldr, into_state_ptr, mem_ptr); + bldr.SetInsertPoint(bldr.GetInsertBlock()); - bldr.CreateStore(new_mem_ptr, - remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); + bldr.CreateStore(new_mem_ptr, + remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); + } } CHECK(bldr.GetInsertPoint() == blk->end()); } From d44084332697274b6c9c62abd7cc69639baee1d5 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 6 Jan 2023 20:03:26 -0500 Subject: [PATCH 091/163] add abstract fallback stack --- lib/Lifters/BasicBlockLifter.cpp | 60 +++++++++++++++++++++++++------- lib/Lifters/BasicBlockLifter.h | 25 +++++++++---- lib/Lifters/FunctionLifter.cpp | 20 +++++++---- lib/Lifters/FunctionLifter.h | 6 ++-- 4 files changed, 82 insertions(+), 29 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 4ad721bb3..9d2a24873 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include +#include #include #include #include @@ -408,7 +410,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { memory->setName("memory"); pc->setName("program_counter"); next_pc_out->setName("next_pc_out"); - state->setName("state"); + state->setName("stack"); auto liftedty = this->options.arch->LiftedFunctionType(); @@ -556,16 +558,35 @@ void BasicBlockLifter::UnpackLiveValues( } +size_t +BasicBlockLifter::StackOffsetFromStackPointer(std::int64_t stack_off) const { + // The offset is relative to the stack pointer but on entry to function so offset into the stack (negative if grows down postive otherwise... + // unless we have parameters) + // TODO(Ian): this wont do the correct thing for stack parameters + + // welp lets crash if we are going to do the wrong thing + CHECK((options.stack_frame_recovery_options.stack_grows_down && + stack_off <= 0) || + (!options.stack_frame_recovery_options.stack_grows_down && + stack_off >= 0)); + return std::abs(stack_off); +} + + +llvm::Type *BasicBlockLifter::GetFrameType() const { + return llvm::ArrayType::get(llvm::IntegerType::getInt8Ty(llvm_context), + this->decl.stack_depth); +} + + void BasicBlockLifter::CallBasicBlockFunction( llvm::IRBuilder<> &builder, llvm::Value *parent_state, - const CallableBasicBlockFunction &cbfunc) const { + const CallableBasicBlockFunction &cbfunc, llvm::Value *parent_stack) const { std::vector args(remill::kNumBlockArgs + 1); - - auto out_param_locals = builder.CreateAlloca(this->var_struct_ty); - args[remill::kStatePointerArgNum] = parent_state; + args[0] = parent_stack; args[remill::kPCArgNum] = options.program_counter_init_procedure( builder, pc_reg, cbfunc.GetBlock().addr); @@ -575,9 +596,19 @@ void BasicBlockLifter::CallBasicBlockFunction( args[remill::kNumBlockArgs] = remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); + auto bbvars = this->block_context->LiveParamsAtEntryAndExit(); - PointerProvider ptr_provider = - [&builder, this, out_param_locals](size_t index) -> llvm::Value * { + auto i32 = llvm::IntegerType::get(llvm_context, 32); + PointerProvider ptr_provider = [&builder, this, out_param_locals, &bbvars, + parent_stack, + i32](size_t index) -> llvm::Value * { + auto repr_var = bbvars[index]; + if (repr_var.param.mem_reg) { + auto off = this->StackOffsetFromStackPointer(repr_var.param.mem_offset); + return builder.CreateGEP( + this->GetFrameType(), parent_stack, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, off)}); + } return this->ProvidePointerFromStruct(builder, out_param_locals, index); }; @@ -604,27 +635,30 @@ void BasicBlockLifter::CallBasicBlockFunction( void CallableBasicBlockFunction::CallBasicBlockFunction( - llvm::IRBuilder<> &add_to_llvm, llvm::Value *parent_state) const { - this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this); + llvm::IRBuilder<> &add_to_llvm, llvm::Value *parent_state, + llvm::Value *abstract_stack) const { + this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this, + abstract_stack); } CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlock( - std::unique_ptr block_context, + std::unique_ptr block_context, const FunctionDecl &decl, const CodeBlock &block_def, const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier) { - return BasicBlockLifter(std::move(block_context), block_def, options_, + return BasicBlockLifter(std::move(block_context), decl, block_def, options_, semantics_module, type_specifier) .LiftBasicBlockFunction(); } BasicBlockLifter::BasicBlockLifter( - std::unique_ptr block_context, + std::unique_ptr block_context, const FunctionDecl &decl, const CodeBlock &block_def, const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier) : CodeLifter(options_, semantics_module, type_specifier), block_context(std::move(block_context)), - block_def(block_def) { + block_def(block_def), + decl(decl) { this->var_struct_ty = this->StructTypeFromVars(); } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index da0939717..b0418c484 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include @@ -53,6 +55,8 @@ class BasicBlockLifter : public CodeLifter { llvm::Function *lifted_func{nullptr}; + const FunctionDecl &decl; + llvm::StructType *StructTypeFromVars() const; remill::DecodingContext ApplyContextAssignments( @@ -65,6 +69,9 @@ class BasicBlockLifter : public CodeLifter { BasicBlockFunction CreateBasicBlockFunction(); + + llvm::Type *GetFrameType() const; + bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &insn, llvm::BasicBlock *&block); @@ -91,13 +98,14 @@ class BasicBlockLifter : public CodeLifter { public: BasicBlockLifter(std::unique_ptr block_context, - const CodeBlock &block_def, const LifterOptions &options_, + const FunctionDecl &decl, const CodeBlock &block_def, + const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier); static CallableBasicBlockFunction LiftBasicBlock(std::unique_ptr block_context, - const CodeBlock &block_def, const LifterOptions &options_, - llvm::Module *semantics_module, + const FunctionDecl &decl, const CodeBlock &block_def, + const LifterOptions &options_, llvm::Module *semantics_module, const TypeTranslator &type_specifier); @@ -105,6 +113,8 @@ class BasicBlockLifter : public CodeLifter { using PointerProvider = std::function; + + // Packs in scope variables into a struct void PackLiveValues(llvm::IRBuilder<> &bldr, llvm::Value *from_state_ptr, PointerProvider into_vars, @@ -117,8 +127,9 @@ class BasicBlockLifter : public CodeLifter { // Calls a basic block function and unpacks the result into the state void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, - const CallableBasicBlockFunction &) const; - + const CallableBasicBlockFunction &, + llvm::Value *parent_stack) const; + size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; BasicBlockLifter(BasicBlockLifter &&) = default; }; @@ -142,8 +153,8 @@ class CallableBasicBlockFunction { const CodeBlock &GetBlock() const; // Calls a basic block function and unpacks the result into the state - void CallBasicBlockFunction(llvm::IRBuilder<> &, - llvm::Value *state_ptr) const; + void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, + llvm::Value *stack_ptr) const; }; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 6c655d7eb..d07e5145c 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -383,13 +383,14 @@ FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { this->curr_decl->GetBlockContext(blk.addr)); return BasicBlockLifter::LiftBasicBlock( - std::move(context), blk, this->options, this->semantics_module.get(), - this->type_specifier); + std::move(context), *this->curr_decl, blk, this->options, + this->semantics_module.get(), this->type_specifier); } void FunctionLifter::VisitBlock(CodeBlock blk, - llvm::Value *lifted_function_state) { + llvm::Value *lifted_function_state, + llvm::Value *abstract_stack) { auto llvm_blk = this->GetOrCreateBlock(blk.addr); llvm::IRBuilder<> builder(llvm_blk); @@ -398,7 +399,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk, CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); - bbfunc.CallBasicBlockFunction(builder, lifted_function_state); + bbfunc.CallBasicBlockFunction(builder, lifted_function_state, abstract_stack); CHECK(anvill::GetBasicBlockAddr(bbfunc.GetFunction()).has_value()); auto pc = remill::LoadNextProgramCounter(llvm_blk, this->intrinsics); @@ -412,14 +413,15 @@ void FunctionLifter::VisitBlock(CodeBlock blk, } } -void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state) { +void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, + llvm::Value *abstract_stack) { DLOG(INFO) << "Num blocks for func " << std::hex << this->curr_decl->address << ": " << this->curr_decl->cfg.size(); for (const auto &[addr, blk] : this->curr_decl->cfg) { DLOG(INFO) << "Visiting: " << std::hex << addr; - this->VisitBlock(blk, lifted_function_state); + this->VisitBlock(blk, lifted_function_state, abstract_stack); } CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); @@ -529,6 +531,10 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { ir.CreateStore(pc, next_pc_reg_ref); ir.CreateStore(pc, pc_reg_ref); + auto abstract_stack = ir.CreateAlloca( + llvm::ArrayType::get(llvm::Type::getInt8Ty(this->llvm_context), + decl.stack_depth), + nullptr, "abstract_stack"); // Add a branch between the first block of the lifted function, which sets // up some local variables, and the block that will contain the lifted // instruction. @@ -544,7 +550,7 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { DLOG(INFO) << "Visiting insns"; // Go lift all instructions! - VisitBlocks(lifted_func_st.state_ptr); + VisitBlocks(lifted_func_st.state_ptr, abstract_stack); // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 7d9c70e7f..03d649fc8 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -265,14 +265,16 @@ class FunctionLifter : public CodeLifter { LoadFunctionReturnAddress(const remill::Instruction &inst, llvm::BasicBlock *block, llvm::Value *state_ptr); - void VisitBlock(CodeBlock entry_context, llvm::Value *lifted_function_state); + void VisitBlock(CodeBlock entry_context, llvm::Value *lifted_function_state, + llvm::Value *abstract_stack); LiftedFunction CreateLiftedFunction(const std::string &name); remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); - void VisitBlocks(llvm::Value *lifted_function_state); + void VisitBlocks(llvm::Value *lifted_function_state, + llvm::Value *abstract_stack); // Try to decode an instruction at address `addr` into `*inst_out`. Returns // a context map if sueccessful and std::nullopt otherwise. `is_delayed` tells the decoder From 6c279f10d805fb34081d872f5bac69366f38f940 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 6 Jan 2023 21:24:23 -0500 Subject: [PATCH 092/163] added falling back to abstract stack... some really odd pointers need to be debugged --- include/anvill/Declarations.h | 24 ++++++++++++++++ include/anvill/Utils.h | 1 + lib/Declarations.cpp | 41 +++++++++++++++++++++++++++ lib/Lifters/BasicBlockLifter.cpp | 19 ++++--------- lib/Lifters/BasicBlockLifter.h | 2 -- lib/Lifters/FunctionLifter.cpp | 6 ++-- lib/Passes/ReplaceStackReferences.cpp | 23 ++++++++++++--- lib/Utils.cpp | 6 ++++ 8 files changed, 100 insertions(+), 22 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 843f357c4..490c853a4 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -215,6 +215,8 @@ class BasicBlockContext { virtual const SpecStackOffsets &GetStackOffsets() const = 0; + virtual size_t GetStackSize() const = 0; + virtual const std::vector &ReturnValue() const = 0; // Deduplicates locations and ensures there are no overlapping decls @@ -233,6 +235,26 @@ class BasicBlockContext { virtual const std::vector &LiveParamsAtExit() const = 0; }; +class AbstractStack { + private: + bool stack_grows_down; + llvm::Type *stack_type; + llvm::Value *stack_ptr; + + public: + AbstractStack(size_t stack_size, llvm::Value *stack_ptr, + bool stack_grows_down); + + size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; + + static llvm::Type *StackTypeFromSize(llvm::LLVMContext &context, size_t size); + + llvm::Type *StackType() const; + + llvm::Value *PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, + std::int64_t stack_off) const; +}; + struct FunctionDecl; class SpecBlockContext : public BasicBlockContext { private: @@ -250,6 +272,8 @@ class SpecBlockContext : public BasicBlockContext { virtual const std::vector &ReturnValue() const override; + virtual size_t GetStackSize() const override; + protected: virtual const std::vector &LiveParamsAtEntry() const override; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 4b26a9b6d..b011b72bb 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -134,4 +134,5 @@ std::optional GetBasicBlockAddr(llvm::Function *func); llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, const anvill::LifterOptions &, const BasicBlockContext &); +llvm::Value *GetBasicBlockStackPtr(llvm::Function *func); } // namespace anvill diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 766c3ed83..092cdd93f 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -224,6 +224,10 @@ const std::vector &SpecBlockContext::ReturnValue() const { return this->decl.returns; } +size_t SpecBlockContext::GetStackSize() const { + return decl.stack_depth; +} + SpecBlockContext::SpecBlockContext( const FunctionDecl &decl, SpecStackOffsets offsets, @@ -449,5 +453,42 @@ SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { GetWithDef(addr, this->live_regs_at_exit, std::vector())); } +size_t +AbstractStack::StackOffsetFromStackPointer(std::int64_t stack_off) const { + // The offset is relative to the stack pointer but on entry to function so offset into the stack (negative if grows down postive otherwise... + // unless we have parameters) + // TODO(Ian): this wont do the correct thing for stack parameters + + // welp lets crash if we are going to do the wrong thing + CHECK((this->stack_grows_down && stack_off <= 0) || + (!this->stack_grows_down && stack_off >= 0)); + return std::abs(stack_off); +} + +llvm::Type *AbstractStack::StackType() const { + return this->stack_type; +} + +llvm::Value * +AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, + std::int64_t stack_off) const { + auto off = this->StackOffsetFromStackPointer(stack_off); + auto i32 = llvm::IntegerType::getInt32Ty(this->stack_ptr->getContext()); + return ir.CreateGEP( + this->StackType(), this->stack_ptr, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, off)}); +} +llvm::Type *AbstractStack::StackTypeFromSize(llvm::LLVMContext &context, + size_t size) { + return llvm::ArrayType::get(llvm::IntegerType::getInt8Ty(context), size); +} + + +AbstractStack::AbstractStack(size_t stack_size, llvm::Value *stack_ptr, + bool stack_grows_down) + : stack_grows_down(stack_grows_down), + stack_type(AbstractStack::StackTypeFromSize(stack_ptr->getContext(), + stack_size)), + stack_ptr(stack_ptr) {} } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 9d2a24873..484c2465e 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -573,12 +573,6 @@ BasicBlockLifter::StackOffsetFromStackPointer(std::int64_t stack_off) const { } -llvm::Type *BasicBlockLifter::GetFrameType() const { - return llvm::ArrayType::get(llvm::IntegerType::getInt8Ty(llvm_context), - this->decl.stack_depth); -} - - void BasicBlockLifter::CallBasicBlockFunction( llvm::IRBuilder<> &builder, llvm::Value *parent_state, const CallableBasicBlockFunction &cbfunc, llvm::Value *parent_stack) const { @@ -598,16 +592,15 @@ void BasicBlockLifter::CallBasicBlockFunction( auto bbvars = this->block_context->LiveParamsAtEntryAndExit(); - auto i32 = llvm::IntegerType::get(llvm_context, 32); + AbstractStack stack( + decl.stack_depth, parent_stack, + this->options.stack_frame_recovery_options.stack_grows_down); PointerProvider ptr_provider = [&builder, this, out_param_locals, &bbvars, - parent_stack, - i32](size_t index) -> llvm::Value * { + &stack](size_t index) -> llvm::Value * { auto repr_var = bbvars[index]; if (repr_var.param.mem_reg) { - auto off = this->StackOffsetFromStackPointer(repr_var.param.mem_offset); - return builder.CreateGEP( - this->GetFrameType(), parent_stack, - {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, off)}); + return stack.PointerToStackMemberFromOffset(builder, + repr_var.param.mem_offset); } return this->ProvidePointerFromStruct(builder, out_param_locals, index); }; diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index b0418c484..a5ad17559 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -70,8 +70,6 @@ class BasicBlockLifter : public CodeLifter { BasicBlockFunction CreateBasicBlockFunction(); - llvm::Type *GetFrameType() const; - bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &insn, llvm::BasicBlock *&block); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index d07e5145c..a1b5a2a9f 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -531,10 +531,10 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { ir.CreateStore(pc, next_pc_reg_ref); ir.CreateStore(pc, pc_reg_ref); + auto abstract_stack = ir.CreateAlloca( - llvm::ArrayType::get(llvm::Type::getInt8Ty(this->llvm_context), - decl.stack_depth), - nullptr, "abstract_stack"); + AbstractStack::StackTypeFromSize(llvm_context, decl.stack_depth), nullptr, + "abstract_stack"); // Add a branch between the first block of the lifted function, which sets // up some local variables, and the block that will contain the lifted // instruction. diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index d3c7e8031..c2ffb0442 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "anvill/Declarations.h" @@ -163,7 +164,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); StackModel smodel(cont, this->lifter.Options().arch); - std::vector> to_replace_vars; + std::vector>> + to_replace_vars; for (auto use : EnumerateStackPointerUsages(F)) { const auto reference = folder.TryResolveReferenceWithCaching(use->get()); @@ -179,18 +181,31 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( if (referenced_variable.has_value() && referenced_variable->offset == 0 && llvm::isa(use->get()->getType())) { to_replace_vars.push_back({use, referenced_variable->decl}); + } else { + // otherwise we are going to escape the abstract stack + to_replace_vars.push_back({use, stack_offset}); } } + + AbstractStack stk( + cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F), + lifter.Options().stack_frame_recovery_options.stack_grows_down); + for (auto [use, v] : to_replace_vars) { llvm::IRBuilder<> ir(&F.getEntryBlock(), F.getEntryBlock().begin()); if (auto *insn = llvm::dyn_cast(use->get())) { ir.SetInsertPoint(insn); } - auto g = anvill::ProvidePointerFromFunctionArgs( - &F, v.index, this->lifter.Options(), cont); - use->set(g); + if (std::holds_alternative(v)) { + auto g = anvill::ProvidePointerFromFunctionArgs( + &F, std::get(v).index, this->lifter.Options(), cont); + use->set(g); + } else { + auto offset = std::get(v); + use->set(stk.PointerToStackMemberFromOffset(ir, offset)); + } } CHECK(!llvm::verifyFunction(F, &llvm::errs())); diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 631633254..3292f2096 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -806,6 +807,10 @@ std::optional GetBasicBlockAddr(llvm::Function *func) { return llvm::cast(v)->getLimitedValue(); } +llvm::Value *GetBasicBlockStackPtr(llvm::Function *func) { + return func->getArg(0); +} + llvm::Value * ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, const anvill::LifterOptions &options, @@ -817,4 +822,5 @@ ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, options.arch->LiftedFunctionType()->getNumParams() + 1); } + } // namespace anvill From 0921d06ab4d03482cea33fba34675e08f9442e50 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 9 Jan 2023 16:17:23 -0500 Subject: [PATCH 093/163] stack fixes --- include/anvill/Declarations.h | 25 ++++-- lib/Declarations.cpp | 56 ++++++++++---- lib/Lifters/BasicBlockLifter.cpp | 12 ++- lib/Optimize.cpp | 9 ++- lib/Passes/ReplaceStackReferences.cpp | 105 ++++++++++++++++++++------ 5 files changed, 157 insertions(+), 50 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 490c853a4..d23d70ea3 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -235,24 +235,35 @@ class BasicBlockContext { virtual const std::vector &LiveParamsAtExit() const = 0; }; + +/// An abstract stack is made up of components with a bytesize, these components allow us to split the stack at offsets +/// in particular this is helpful for splitting out stack space beyond the locals for things like return addresses +struct StackComponent { + size_t size; + llvm::Value *stackptr; +}; + class AbstractStack { private: + llvm::LLVMContext &context; bool stack_grows_down; - llvm::Type *stack_type; - llvm::Value *stack_ptr; + std::vector stack_types; + std::vector components; + size_t total_size; public: - AbstractStack(size_t stack_size, llvm::Value *stack_ptr, - bool stack_grows_down); + AbstractStack(llvm::LLVMContext &context, + std::vector components, bool stack_grows_down); size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; static llvm::Type *StackTypeFromSize(llvm::LLVMContext &context, size_t size); - llvm::Type *StackType() const; + //llvm::Type *StackType() const; - llvm::Value *PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, - std::int64_t stack_off) const; + std::optional + PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, + std::int64_t stack_off) const; }; struct FunctionDecl; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 092cdd93f..8e8022179 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -462,33 +463,60 @@ AbstractStack::StackOffsetFromStackPointer(std::int64_t stack_off) const { // welp lets crash if we are going to do the wrong thing CHECK((this->stack_grows_down && stack_off <= 0) || (!this->stack_grows_down && stack_off >= 0)); - return std::abs(stack_off); -} -llvm::Type *AbstractStack::StackType() const { - return this->stack_type; + if (this->stack_grows_down) { + LOG(INFO) << this->total_size; + return this->total_size + stack_off; + } else { + return stack_off; + } } -llvm::Value * +std::optional AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, std::int64_t stack_off) const { auto off = this->StackOffsetFromStackPointer(stack_off); - auto i32 = llvm::IntegerType::getInt32Ty(this->stack_ptr->getContext()); - return ir.CreateGEP( - this->StackType(), this->stack_ptr, - {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, off)}); + auto i32 = llvm::IntegerType::getInt32Ty(this->context); + LOG(INFO) << "Looking for offset" << off; + auto curr_off = 0; + auto curr_ind = 0; + for (auto [sz, ptr] : this->components) { + if (off < curr_off + sz) { + LOG(INFO) << "Found for " << remill::LLVMThingToString(ptr); + LOG(INFO) << curr_off << " " << sz; + return ir.CreateGEP(this->stack_types[curr_ind], ptr, + {llvm::ConstantInt::get(i32, 0), + llvm::ConstantInt::get(i32, off - curr_off)}); + } + curr_off += sz; + curr_ind++; + } + + return std::nullopt; } + llvm::Type *AbstractStack::StackTypeFromSize(llvm::LLVMContext &context, size_t size) { return llvm::ArrayType::get(llvm::IntegerType::getInt8Ty(context), size); } -AbstractStack::AbstractStack(size_t stack_size, llvm::Value *stack_ptr, +AbstractStack::AbstractStack(llvm::LLVMContext &context, + std::vector components, bool stack_grows_down) - : stack_grows_down(stack_grows_down), - stack_type(AbstractStack::StackTypeFromSize(stack_ptr->getContext(), - stack_size)), - stack_ptr(stack_ptr) {} + : context(context), + stack_grows_down(stack_grows_down), + components(std::move(components)), + total_size(0) { + + if (stack_grows_down) { + std::reverse(this->components.begin(), this->components.end()); + } + + for (const auto &[k, v] : this->components) { + this->stack_types.push_back(this->StackTypeFromSize(context, k)); + total_size += k; + } +} } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 484c2465e..96c24f696 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -593,14 +593,20 @@ void BasicBlockLifter::CallBasicBlockFunction( auto bbvars = this->block_context->LiveParamsAtEntryAndExit(); AbstractStack stack( - decl.stack_depth, parent_stack, + builder.getContext(), {{decl.stack_depth, parent_stack}}, this->options.stack_frame_recovery_options.stack_grows_down); PointerProvider ptr_provider = [&builder, this, out_param_locals, &bbvars, &stack](size_t index) -> llvm::Value * { auto repr_var = bbvars[index]; if (repr_var.param.mem_reg) { - return stack.PointerToStackMemberFromOffset(builder, - repr_var.param.mem_offset); + auto stack_ptr = stack.PointerToStackMemberFromOffset( + builder, repr_var.param.mem_offset); + if (stack_ptr) { + return *stack_ptr; + } else { + LOG(FATAL) + << "Unable to create a ptr to the stack, the stack is too small to represent the param."; + } } return this->ProvidePointerFromStruct(builder, out_param_locals, index); }; diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index bb2f7231c..70e9100af 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -255,7 +255,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); - fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); + fpm.addPass(llvm::SROAPass()); + // fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); fpm.addPass(llvm::VerifierPass()); @@ -268,12 +269,14 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // makes code easier to read and analyze. This is a fairly narrow optimization // but it comes up often enough for lifted code. - AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); + fpm.addPass(llvm::VerifierPass()); fpm.addPass(anvill::RemoveCallIntrinsics(xr, spec, lifter)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); fpm.addPass(llvm::VerifierPass()); + AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); + AddBranchRecovery(fpm); fpm.addPass(llvm::VerifierPass()); @@ -297,6 +300,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::NewGVNPass()); second_fpm.addPass(llvm::VerifierPass()); AddSpreadPCMetadata(second_fpm, options); + + second_fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(CodeQualityStatCollector()); second_fpm.addPass(llvm::VerifierPass()); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index c2ffb0442..4ade267ef 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -2,11 +2,16 @@ #include #include #include +#include #include +#include +#include #include #include #include #include +#include +#include #include #include #include @@ -22,6 +27,29 @@ #include "anvill/Declarations.h" namespace anvill { +namespace { +std::optional +GetPtrToOffsetInto(llvm::IRBuilder<> &ir, const llvm::DataLayout &dl, + llvm::Type *deref_type, llvm::Value *ptr, + size_t offset_into_type) { + if (offset_into_type == 0) { + return ptr; + } + + + llvm::APInt ap_off(64, offset_into_type, false); + auto elem_type = deref_type; + auto index = dl.getGEPIndexForOffset(elem_type, ap_off); + + if (!index) { + return std::nullopt; + } + auto i32 = llvm::IntegerType::getInt32Ty(deref_type->getContext()); + return ir.CreateGEP( + deref_type, ptr, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, *index)}); +} +} // namespace llvm::StringRef ReplaceStackReferences::name(void) { return "Replace stack references"; @@ -78,7 +106,8 @@ class StackModel { return arch->DataLayout().getTypeSizeInBits(decl.type) / 8; } - StackModel(const BasicBlockContext &cont, const remill::Arch *arch) { + StackModel(const BasicBlockContext &cont, const remill::Arch *arch, + const AbstractStack &abs_stack) { this->arch = arch; size_t index = 0; // this feels weird maybe it should be all stack variables but then if the variable isnt live... @@ -114,6 +143,7 @@ class StackModel { } std::optional GetOverlappingParam(std::int64_t off) { + auto vlte = GetParamLte(off); if (!vlte.has_value()) { @@ -121,11 +151,12 @@ class StackModel { } auto offset_into_var = off - vlte->decl.mem_offset; - if (offset_into_var < static_cast( - vlte->decl.type->getPrimitiveSizeInBits() / 8)) { + if (offset_into_var < + static_cast(GetParamDeclSize(vlte->decl))) { return {{offset_into_var, *vlte}}; } - + LOG(INFO) << "Looking for off " << off << " but not fitting " + << offset_into_var << " got off " << vlte->decl.mem_offset; return std::nullopt; } @@ -162,9 +193,24 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( const BasicBlockContext &cont) { NullCrossReferenceResolver resolver; CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); - StackModel smodel(cont, this->lifter.Options().arch); - std::vector>> + + size_t overrunsz = 100; + llvm::IRBuilder<> ent_insert(&F.getEntryBlock(), F.getEntryBlock().begin()); + auto overrunptr = ent_insert.CreateAlloca( + AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); + + AbstractStack stk( + F.getContext(), + {{cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F)}, + {overrunsz, overrunptr}}, + lifter.Options().stack_frame_recovery_options.stack_grows_down); + + StackModel smodel(cont, this->lifter.Options().arch, stk); + + + // TODO(Ian): do a fixed size here + std::vector>> to_replace_vars; for (auto use : EnumerateStackPointerUsages(F)) { @@ -177,34 +223,45 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( int64_t stack_offset = reference.Displacement(this->lifter.DataLayout()); auto referenced_variable = smodel.GetOverlappingParam(stack_offset); + //TODO(Ian) handle nonzero offset - if (referenced_variable.has_value() && referenced_variable->offset == 0 && + if (referenced_variable.has_value() && llvm::isa(use->get()->getType())) { - to_replace_vars.push_back({use, referenced_variable->decl}); - } else { - // otherwise we are going to escape the abstract stack - to_replace_vars.push_back({use, stack_offset}); + + auto g = anvill::ProvidePointerFromFunctionArgs( + &F, referenced_variable->decl.index, this->lifter.Options(), cont); + auto ptr = GetPtrToOffsetInto(ent_insert, this->lifter.DataLayout(), + referenced_variable->decl.decl.type, g, + referenced_variable->offset); + if (ptr) { + to_replace_vars.push_back({use, *ptr}); + continue; + } } + // otherwise we are going to escape the abstract stack + to_replace_vars.push_back({use, stack_offset}); } - - AbstractStack stk( - cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F), - lifter.Options().stack_frame_recovery_options.stack_grows_down); + if (to_replace_vars.empty()) { + return llvm::PreservedAnalyses::all(); + } for (auto [use, v] : to_replace_vars) { - llvm::IRBuilder<> ir(&F.getEntryBlock(), F.getEntryBlock().begin()); - if (auto *insn = llvm::dyn_cast(use->get())) { - ir.SetInsertPoint(insn); - } + use->get()->dump(); - if (std::holds_alternative(v)) { - auto g = anvill::ProvidePointerFromFunctionArgs( - &F, std::get(v).index, this->lifter.Options(), cont); - use->set(g); + if (std::holds_alternative(v)) { + + use->set(std::get(v)); } else { auto offset = std::get(v); - use->set(stk.PointerToStackMemberFromOffset(ir, offset)); + auto ptr = stk.PointerToStackMemberFromOffset(ent_insert, offset); + if (ptr) { + use->set(*ptr); + } else { + LOG(ERROR) << "No pointer for offset " << offset + << " was supposed to use " + << stk.StackOffsetFromStackPointer(offset); + } } } CHECK(!llvm::verifyFunction(F, &llvm::errs())); From d3aa66a7ade3211fd3db9663020e8c0dfd6c11f4 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 10 Jan 2023 10:41:25 -0500 Subject: [PATCH 094/163] add better logging --- lib/Lifters/BasicBlockLifter.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 96c24f696..ecc2dae7b 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -292,7 +292,6 @@ bool BasicBlockLifter::DecodeInstructionInto(const uint64_t addr, return options.arch->DecodeDelayedInstruction( addr, inst_out->bytes, *inst_out, std::move(context)); } else { - DLOG(INFO) << "Ops emplace: " << inst_out->operands.size(); return options.arch->DecodeInstruction(addr, inst_out->bytes, *inst_out, std::move(context)); } @@ -315,11 +314,13 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { auto init_context = this->CreateDecodingContext(this->block_def); - + LOG(INFO) << "Decoding block at addr: " << std::hex << this->block_def.addr + << " with size " << this->block_def.size; bool ended_on_terminal = false; while (reached_addr < this->block_def.addr + this->block_def.size && !ended_on_terminal) { auto addr = reached_addr; + LOG(INFO) << "Decoding at addr " << std::hex << addr; auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { LOG(FATAL) << "Failed to decode insn in block " << std::hex << addr; @@ -338,6 +339,8 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride(inst, bb); + LOG_IF(INFO, ended_on_terminal) + << "On terminal at addr: " << std::hex << addr; } if (!ended_on_terminal) { From adb25be1757c37c7999e33396dfc5c2e6ee8fb86 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 10 Jan 2023 12:58:36 -0500 Subject: [PATCH 095/163] add more logging --- lib/Declarations.cpp | 3 ++- lib/Passes/RemoveCallIntrinsics.cpp | 5 ++++- lib/Passes/ReplaceStackReferences.cpp | 1 - 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 8e8022179..c20603ab7 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -275,7 +275,8 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( // the function, which will be based off of the register state // on entry to the function. auto new_sp_base = return_stack_pointer->AddressOf(state_ptr, ir); - + LOG(INFO) << "Modifying ret stack pointer by: " + << return_stack_pointer_offset; const auto sp_val_on_exit = ir.CreateAdd( ir.CreateLoad(return_stack_pointer->type, new_sp_base), llvm::ConstantInt::get(return_stack_pointer->type, diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index 57341a4ce..e0320441c 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace anvill { llvm::StringRef RemoveCallIntrinsics::name(void) { @@ -44,7 +45,9 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, const remill::IntrinsicTable table( remillFunctionCall->getFunction()->getParent()); - + LOG(INFO) << "Replacing call from: " + << remill::LLVMThingToString(remillFunctionCall) + << " with call to " << std::hex << ra.u.address; auto new_mem = fdecl->CallFromLiftedBlock(entity, lifter.Options().TypeDictionary(), table, ir, state_ptr, mem_ptr); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 4ade267ef..dde358463 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -194,7 +194,6 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( NullCrossReferenceResolver resolver; CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); - size_t overrunsz = 100; llvm::IRBuilder<> ent_insert(&F.getEntryBlock(), F.getEntryBlock().begin()); auto overrunptr = ent_insert.CreateAlloca( From 460e7c0dc697b265cdafbddb1a7007a1a3ba6932 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 11 Jan 2023 17:42:32 +1100 Subject: [PATCH 096/163] Use live exits vector instead of live entries again --- lib/Declarations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index c20603ab7..fe855123d 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -143,7 +143,7 @@ BasicBlockContext::LiveParamsAtEntryAndExit() const { }; add_all_from_vector(live_entries); - add_all_from_vector(live_entries); + add_all_from_vector(live_exits); return res; } From 637ccfd408dfa3d9f6c14b76e2c8df909ab4b246 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 11 Jan 2023 10:33:24 -0500 Subject: [PATCH 097/163] temporary no alias --- include/anvill/Utils.h | 10 ++++++---- lib/Lifters/BasicBlockLifter.cpp | 12 +++++++++++- lib/Optimize.cpp | 4 +++- lib/Passes/ReplaceStackReferences.cpp | 21 +++++++++++++++++++++ lib/Utils.cpp | 5 +++-- 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index b011b72bb..98ba852b1 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -131,8 +132,9 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, std::optional GetBasicBlockAddr(llvm::Function *func); -llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, - const anvill::LifterOptions &, - const BasicBlockContext &); -llvm::Value *GetBasicBlockStackPtr(llvm::Function *func); +llvm::Argument *ProvidePointerFromFunctionArgs(llvm::Function *func, + size_t index, + const anvill::LifterOptions &, + const BasicBlockContext &); +llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index ecc2dae7b..b92e5d2b3 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -398,10 +399,19 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto start_ind = lifted_func_type->getNumParams() + 1; for (auto v : this->block_context->LiveParamsAtEntryAndExit()) { + auto arg = remill::NthArgument(func, start_ind); if (!v.param.name.empty()) { - auto arg = remill::NthArgument(func, start_ind); arg->setName(v.param.name); } + + if (v.param.reg) { + // Registers should not have aliases + arg->addAttr(llvm::Attribute::get(llvm_context, + llvm::Attribute::AttrKind::NoAlias)); + } + // TODO(Ian): If we can eliminate the stack then we also are able to declare more no aliases here, not sure the + // best way to handle this + start_ind += 1; } diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 70e9100af..7d3eaa08d 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -291,6 +291,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, AddTransformRemillJumpIntrinsics(second_fpm, xr); second_fpm.addPass(llvm::VerifierPass()); + second_fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); //AddRemoveRemillFunctionReturns(second_fpm, xr); //AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); AddLowerRemillUndefinedIntrinsics(second_fpm); @@ -299,9 +300,10 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(llvm::NewGVNPass()); second_fpm.addPass(llvm::VerifierPass()); + second_fpm.addPass(llvm::InstCombinePass()); AddSpreadPCMetadata(second_fpm, options); - second_fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); + second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(CodeQualityStatCollector()); second_fpm.addPass(llvm::VerifierPass()); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index dde358463..77a3c7f25 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include "anvill/Declarations.h" +#include "anvill/Utils.h" namespace anvill { namespace { @@ -212,6 +214,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( std::vector>> to_replace_vars; + auto collision = false; for (auto use : EnumerateStackPointerUsages(F)) { const auto reference = folder.TryResolveReferenceWithCaching(use->get()); if (!reference.is_valid || !reference.references_stack_pointer) { @@ -236,6 +239,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( to_replace_vars.push_back({use, *ptr}); continue; } + collision = true; } // otherwise we are going to escape the abstract stack to_replace_vars.push_back({use, stack_offset}); @@ -265,6 +269,23 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( } CHECK(!llvm::verifyFunction(F, &llvm::errs())); + + if (EnumerateStackPointerUsages(F).empty() && !collision) { + auto noalias = + llvm::Attribute::get(F.getContext(), llvm::Attribute::NoAlias); + + // Note(Ian): the theory here is if all stack references are resolved, then any + // pointer use of the stack only derives from unresolved offsets + // TODO(Ian): this isnt sound if the resolved stack pointer then has further manipulation causing it to land inside a variable + anvill::GetBasicBlockStackPtr(&F)->addAttr(noalias); + + for (auto lives : cont.LiveParamsAtEntryAndExit()) { + ProvidePointerFromFunctionArgs(&F, lives.index, this->lifter.Options(), + cont) + ->addAttr(noalias); + } + } + return to_replace_vars.empty() ? llvm::PreservedAnalyses::all() : llvm::PreservedAnalyses::none(); } diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 3292f2096..23c333fb6 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -807,11 +808,11 @@ std::optional GetBasicBlockAddr(llvm::Function *func) { return llvm::cast(v)->getLimitedValue(); } -llvm::Value *GetBasicBlockStackPtr(llvm::Function *func) { +llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { return func->getArg(0); } -llvm::Value * +llvm::Argument * ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, const anvill::LifterOptions &options, const anvill::BasicBlockContext &context) { From 45d919e2e5c01b3e6f9061ec5dd32d5afa9b3ea2 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 12 Jan 2023 10:05:44 -0500 Subject: [PATCH 098/163] allow stack references to be resolved wrt to a base --- include/anvill/CrossReferenceFolder.h | 4 ++ include/anvill/Declarations.h | 7 +++ include/anvill/Utils.h | 6 +- lib/CrossReferenceFolder.cpp | 30 ++++++++-- lib/Declarations.cpp | 24 ++++++++ lib/Optimize.cpp | 2 +- lib/Passes/RemoveCallIntrinsics.cpp | 8 +++ lib/Passes/RemoveRemillFunctionReturns.cpp | 35 ++++++------ lib/Passes/RemoveStackPointerCExprs.cpp | 19 +++---- lib/Passes/ReplaceStackReferences.cpp | 66 +++++++++++++++++++--- lib/Utils.cpp | 31 ++++++++-- 11 files changed, 183 insertions(+), 49 deletions(-) diff --git a/include/anvill/CrossReferenceFolder.h b/include/anvill/CrossReferenceFolder.h index b3f834095..d536f6c11 100644 --- a/include/anvill/CrossReferenceFolder.h +++ b/include/anvill/CrossReferenceFolder.h @@ -120,6 +120,10 @@ class CrossReferenceFolder { CrossReferenceFolder &operator=(const CrossReferenceFolder &) = default; CrossReferenceFolder &operator=(CrossReferenceFolder &&) noexcept = default; + protected: + virtual std::optional + ResolveValueCallback(llvm::Value *) const; + private: CrossReferenceFolder(void) = delete; diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index d23d70ea3..69a1f8010 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -257,6 +257,13 @@ class AbstractStack { size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; + + std::int64_t StackPointerFromStackOffset(size_t offset) const; + + + std::optional + StackPointerFromStackCompreference(llvm::Value *) const; + static llvm::Type *StackTypeFromSize(llvm::LLVMContext &context, size_t size); //llvm::Type *StackType() const; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 98ba852b1..3c48f8e26 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -70,10 +70,12 @@ class StackPointerResolver { public: ~StackPointerResolver(void); - explicit StackPointerResolver(llvm::Module *module); + explicit StackPointerResolver( + llvm::Module *module, + llvm::ArrayRef additional_base_stack_ptrst); // Returns `true` if it looks like `val` is derived from a symbolic stack - // pointer representation. + // pointer representation, a basic block variable that is stack derived, or the abstract stack itself. bool IsRelatedToStackPointer(llvm::Value *) const; }; diff --git a/lib/CrossReferenceFolder.cpp b/lib/CrossReferenceFolder.cpp index 2da91e739..ed886bcdf 100644 --- a/lib/CrossReferenceFolder.cpp +++ b/lib/CrossReferenceFolder.cpp @@ -25,6 +25,8 @@ #include #include +#include +#include #include namespace anvill { @@ -56,10 +58,13 @@ using ResolvedCrossReferenceCache = class CrossReferenceFolderImpl { public: - CrossReferenceFolderImpl(const CrossReferenceResolver &xref_resolver_, - const llvm::DataLayout &dl_) + CrossReferenceFolderImpl( + const CrossReferenceResolver &xref_resolver_, const llvm::DataLayout &dl_, + std::function(llvm::Value *)> + value_cb) : xref_resolver(xref_resolver_), - dl(dl_) {} + dl(dl_), + callback_resolve_value(std::move(value_cb)) {} ResolvedCrossReference ResolveInstruction(llvm::Instruction *inst_val); ResolvedCrossReference ResolveConstant(llvm::Constant *const_val); @@ -171,6 +176,10 @@ class CrossReferenceFolderImpl { // Discovered entities. std::vector entities; + + // Callback + std::function(llvm::Value *)> + callback_resolve_value; }; @@ -640,9 +649,13 @@ CrossReferenceFolderImpl::ResolveCall(llvm::CallInst *call) { // Try to resolve `val` as a cross-reference. ResolvedCrossReference CrossReferenceFolderImpl::ResolveValue(llvm::Value *val) { + auto cb_res = this->callback_resolve_value(val); + if (cb_res) { + return *cb_res; + } + if (auto const_val = llvm::dyn_cast(val)) { return ResolveConstant(const_val); - } else if (auto inst_val = llvm::dyn_cast(val)) { return ResolveInstruction(inst_val); } else { @@ -669,7 +682,9 @@ CrossReferenceFolder::~CrossReferenceFolder(void) {} // lifter that can resolve global references on our behalf. CrossReferenceFolder::CrossReferenceFolder( const CrossReferenceResolver &resolver, const llvm::DataLayout &dl) - : impl(std::make_shared(resolver, dl)) {} + : impl(std::make_shared( + resolver, dl, + [this](llvm::Value *v) { return this->ResolveValueCallback(v); })) {} // Return a reference to the data layout used by the cross-reference folder. const llvm::DataLayout &CrossReferenceFolder::DataLayout(void) const { @@ -721,4 +736,9 @@ ResolvedCrossReference::Displacement(const llvm::DataLayout &dl) const { return displacement; } +std::optional +CrossReferenceFolder::ResolveValueCallback(llvm::Value *) const { + return std::nullopt; +} + } // namespace anvill diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index fe855123d..44198c443 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -277,6 +277,8 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( auto new_sp_base = return_stack_pointer->AddressOf(state_ptr, ir); LOG(INFO) << "Modifying ret stack pointer by: " << return_stack_pointer_offset; + + // TODO(Ian): this could go in the wrong direction if stack option is set to go up const auto sp_val_on_exit = ir.CreateAdd( ir.CreateLoad(return_stack_pointer->type, new_sp_base), llvm::ConstantInt::get(return_stack_pointer->type, @@ -473,6 +475,28 @@ AbstractStack::StackOffsetFromStackPointer(std::int64_t stack_off) const { } } +std::int64_t AbstractStack::StackPointerFromStackOffset(size_t offset) const { + if (stack_grows_down) { + return static_cast(offset) - this->total_size; + } else { + return offset; + } +} + + +std::optional +AbstractStack::StackPointerFromStackCompreference(llvm::Value *tgt) const { + size_t curr_off = 0; + for (auto comp : this->components) { + if (comp.stackptr == tgt) { + return this->StackPointerFromStackOffset(curr_off); + } + curr_off += comp.size; + } + + return std::nullopt; +} + std::optional AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, std::int64_t stack_off) const { diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 7d3eaa08d..00b34c35e 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -256,7 +256,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); fpm.addPass(llvm::SROAPass()); - // fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); + //fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass()); fpm.addPass(llvm::VerifierPass()); diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index e0320441c..bec8aeebc 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -8,6 +8,8 @@ #include #include +#include "anvill/Utils.h" + namespace anvill { llvm::StringRef RemoveCallIntrinsics::name(void) { return "Remove call intrinsics."; @@ -20,6 +22,11 @@ llvm::PreservedAnalyses RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, llvm::FunctionAnalysisManager &am, llvm::PreservedAnalyses prev) { + // remillFunctionCall->getFunction()->dump(); + // if (remillFunctionCall->getFunction()->getName().endswith( + // "basic_block_func4201200")) { + // LOG(FATAL) << "done"; + // } CHECK(remillFunctionCall->getNumOperands() == 4); auto target_func = remillFunctionCall->getArgOperand(1); auto state_ptr = remillFunctionCall->getArgOperand(0); @@ -43,6 +50,7 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, llvm::IRBuilder<> ir(remillFunctionCall->getParent()); ir.SetInsertPoint(remillFunctionCall); + const remill::IntrinsicTable table( remillFunctionCall->getFunction()->getParent()); LOG(INFO) << "Replacing call from: " diff --git a/lib/Passes/RemoveRemillFunctionReturns.cpp b/lib/Passes/RemoveRemillFunctionReturns.cpp index 154031998..0607a37d2 100644 --- a/lib/Passes/RemoveRemillFunctionReturns.cpp +++ b/lib/Passes/RemoveRemillFunctionReturns.cpp @@ -6,9 +6,8 @@ * the LICENSE file found in the root directory of this source tree. */ -#include - #include +#include #include #include #include @@ -27,6 +26,7 @@ #include #include + #include "Utils.h" namespace anvill { @@ -54,12 +54,12 @@ static void FoldReturnAddressMatch(llvm::CallBase *call) { ret_addr->eraseFromParent(); ret_addr = next_ret_addr; - // Call to `llvm.returnaddress`. + // Call to `llvm.returnaddress`. } else if (IsReturnAddress(module, ret_addr)) { ret_addr->eraseFromParent(); break; - // Who knows?! + // Who knows?! } else { LOG(ERROR) << "Encountered unexpected instruction when removing return address: " @@ -83,8 +83,8 @@ static void OverwriteReturnAddress( for (auto &[call, ret_addr] : fixups) { // Store the return address. llvm::IRBuilder<> ir(call); - auto *bit_cast = ir.CreateBitCast(addr_of_ret_addr, - llvm::PointerType::get(ir.getContext(), 0)); + auto *bit_cast = ir.CreateBitCast( + addr_of_ret_addr, llvm::PointerType::get(ir.getContext(), 0)); CopyMetadataTo(call, bit_cast); auto *store = ir.CreateStore(ret_addr, bit_cast); CopyMetadataTo(call, store); @@ -110,7 +110,7 @@ RemoveRemillFunctionReturns::run(llvm::Function &func, llvm::FunctionAnalysisManager &AM) { const auto module = func.getParent(); CrossReferenceFolder xref_folder(xref_resolver, module->getDataLayout()); - StackPointerResolver sp_resolver(module); + StackPointerResolver sp_resolver(module, {}); std::vector matches_pattern; std::vector> fixups; @@ -121,7 +121,8 @@ RemoveRemillFunctionReturns::run(llvm::Function &func, func && func->getName() == "__remill_function_return") { auto ret_addr = call->getArgOperand(remill::kPCArgNum) ->stripPointerCastsAndAliases(); - switch (QueryReturnAddress(xref_folder, sp_resolver, module, ret_addr)) { + switch ( + QueryReturnAddress(xref_folder, sp_resolver, module, ret_addr)) { case kFoundReturnAddress: matches_pattern.push_back(call); break; // Do nothing if it's a symbolic stack pointer load; we're probably @@ -158,11 +159,9 @@ RemoveRemillFunctionReturns::run(llvm::Function &func, } // Returns `true` if `val` is a return address. -ReturnAddressResult -RemoveRemillFunctionReturns::QueryReturnAddress( +ReturnAddressResult RemoveRemillFunctionReturns::QueryReturnAddress( const CrossReferenceFolder &xref_folder, - const StackPointerResolver &sp_resolver, - llvm::Module *module, + const StackPointerResolver &sp_resolver, llvm::Module *module, llvm::Value *val) const { if (IsReturnAddress(module, val)) { @@ -200,12 +199,12 @@ RemoveRemillFunctionReturns::QueryReturnAddress( } else if (IsRelatedToStackPointer(module, val)) { return kFoundSymbolicStackPointerLoad; - // Sometimes optimizations result in really crazy looking constant expressions - // related to `__anvill_ra`, full of shifts, zexts, etc. We try to detect - // this situation by initializing a "magic" address associated with - // `__anvill_ra`, and then if we find this magic value on something that - // references `__anvill_ra`, then we conclude that all those manipulations - // in the constant expression are actually not important. + // Sometimes optimizations result in really crazy looking constant expressions + // related to `__anvill_ra`, full of shifts, zexts, etc. We try to detect + // this situation by initializing a "magic" address associated with + // `__anvill_ra`, and then if we find this magic value on something that + // references `__anvill_ra`, then we conclude that all those manipulations + // in the constant expression are actually not important. } else if (auto xr = xref_folder.TryResolveReferenceWithClearedCache(val); xr.is_valid && xr.references_return_address && xr.u.address == xref_folder.MagicReturnAddressValue()) { diff --git a/lib/Passes/RemoveStackPointerCExprs.cpp b/lib/Passes/RemoveStackPointerCExprs.cpp index 416c45a60..35c50f511 100644 --- a/lib/Passes/RemoveStackPointerCExprs.cpp +++ b/lib/Passes/RemoveStackPointerCExprs.cpp @@ -6,11 +6,10 @@ * the LICENSE file found in the root directory of this source tree. */ -#include - #include #include #include +#include #include #include #include @@ -26,19 +25,19 @@ namespace { class ConcreteStackPointerResolver final : public NullCrossReferenceResolver { private: - llvm::Module * const module; + llvm::Module *const module; const StackFrameRecoveryOptions &options; public: virtual ~ConcreteStackPointerResolver(void) = default; inline explicit ConcreteStackPointerResolver( - llvm::Module *module_, - const StackFrameRecoveryOptions &options_) - : module(module_), options(options_) {} + llvm::Module *module_, const StackFrameRecoveryOptions &options_) + : module(module_), + options(options_) {} - std::optional AddressOfEntity( - llvm::Constant *ent) const final { + std::optional + AddressOfEntity(llvm::Constant *ent) const final { if (!IsStackPointer(module, ent)) { return std::nullopt; } @@ -88,13 +87,13 @@ RemoveStackPointerCExprs::run(llvm::Function &func, return llvm::PreservedAnalyses::all(); } - llvm::Module * const module = func.getParent(); + llvm::Module *const module = func.getParent(); const llvm::DataLayout &dl = module->getDataLayout(); const auto addr_size = dl.getPointerSizeInBits(0); ConcreteStackPointerResolver resolver(module, options); CrossReferenceFolder folder(resolver, dl); - StackPointerResolver stack_resolver(module); + StackPointerResolver stack_resolver(module, {}); std::vector worklist; diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 77a3c7f25..2dc66ac2b 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include @@ -30,6 +32,43 @@ namespace anvill { namespace { + +class StackCrossReferenceResolver : public CrossReferenceFolder { + private: + const llvm::DataLayout &dl; + const AbstractStack &abs_stack; + + ResolvedCrossReference StackPtrToXref(std::int64_t off) const { + ResolvedCrossReference rxref; + rxref.is_valid = true; + rxref.references_stack_pointer = true; + rxref.size = dl.getPointerSizeInBits(0); + rxref.u.displacement = off; + return rxref; + } + + public: + StackCrossReferenceResolver(const CrossReferenceResolver &resolver, + const llvm::DataLayout &dl, + const AbstractStack &abs_stack) + : CrossReferenceFolder(resolver, dl), + dl(dl), + abs_stack(abs_stack) {} + + protected: + virtual std::optional + ResolveValueCallback(llvm::Value *v) const override { + LOG(INFO) << "Looking at: " << remill::LLVMThingToString(v); + auto stack_ref = abs_stack.StackPointerFromStackCompreference(v); + if (stack_ref) { + return this->StackPtrToXref(*stack_ref); + } + + return std::nullopt; + } +}; + + std::optional GetPtrToOffsetInto(llvm::IRBuilder<> &ir, const llvm::DataLayout &dl, llvm::Type *deref_type, llvm::Value *ptr, @@ -49,7 +88,10 @@ GetPtrToOffsetInto(llvm::IRBuilder<> &ir, const llvm::DataLayout &dl, auto i32 = llvm::IntegerType::getInt32Ty(deref_type->getContext()); return ir.CreateGEP( deref_type, ptr, - {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, *index)}); + {llvm::ConstantInt::get(i32, 0), + llvm::ConstantInt::get(llvm::IntegerType::get(deref_type->getContext(), + index->getBitWidth()), + *index)}); } } // namespace @@ -65,9 +107,10 @@ using StackPointerRegisterUsages = std::vector; // Enumerates all the store and load instructions that reference // the stack static StackPointerRegisterUsages -EnumerateStackPointerUsages(llvm::Function &function) { +EnumerateStackPointerUsages(llvm::Function &function, + llvm::ArrayRef additional_sps) { StackPointerRegisterUsages output; - StackPointerResolver sp_resolver(function.getParent()); + StackPointerResolver sp_resolver(function.getParent(), additional_sps); for (auto &basic_block : function) { for (auto &instr : basic_block) { @@ -84,7 +127,6 @@ EnumerateStackPointerUsages(llvm::Function &function) { return output; } - struct BasicBlockVar { size_t index; ParameterDecl decl; @@ -193,8 +235,8 @@ class StackModel { llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( llvm::Function &F, llvm::FunctionAnalysisManager &AM, const BasicBlockContext &cont) { - NullCrossReferenceResolver resolver; - CrossReferenceFolder folder(resolver, this->lifter.DataLayout()); + F.dump(); + size_t overrunsz = 100; llvm::IRBuilder<> ent_insert(&F.getEntryBlock(), F.getEntryBlock().begin()); @@ -207,15 +249,21 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( {overrunsz, overrunptr}}, lifter.Options().stack_frame_recovery_options.stack_grows_down); + StackModel smodel(cont, this->lifter.Options().arch, stk); + NullCrossReferenceResolver resolver; + StackCrossReferenceResolver folder(resolver, this->lifter.DataLayout(), stk); + // TODO(Ian): do a fixed size here std::vector>> to_replace_vars; auto collision = false; - for (auto use : EnumerateStackPointerUsages(F)) { + // TODO(Ian): also handle resolving from references where the base is inside a bb var + for (auto use : + EnumerateStackPointerUsages(F, {anvill::GetBasicBlockStackPtr(&F)})) { const auto reference = folder.TryResolveReferenceWithCaching(use->get()); if (!reference.is_valid || !reference.references_stack_pointer) { continue; @@ -270,7 +318,9 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( CHECK(!llvm::verifyFunction(F, &llvm::errs())); - if (EnumerateStackPointerUsages(F).empty() && !collision) { + // This isnt a sound check at all we could still derive a pointer to a variable from another variable. Essentially need to check that all + // derivations are in bounds... + if (EnumerateStackPointerUsages(F, {}).empty() && !collision) { auto noalias = llvm::Attribute::get(F.getContext(), llvm::Attribute::NoAlias); diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 23c333fb6..f875e62e2 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -24,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +36,7 @@ #include #include #include +#include namespace anvill { @@ -564,13 +569,27 @@ class StackPointerResolverImpl { public: bool ResolveFromValue(llvm::Value *val); bool ResolveFromConstantExpr(llvm::ConstantExpr *ce); + bool IsStackPointerBase(llvm::Value *canidate); - inline explicit StackPointerResolverImpl(llvm::Module *m) : module(m) {} + inline explicit StackPointerResolverImpl( + llvm::Module *m, llvm::ArrayRef additional_base_stack_ptrs) + : module(m) { + this->stack_related_args.insert(additional_base_stack_ptrs.begin(), + additional_base_stack_ptrs.end()); + } llvm::Module *const module; std::unordered_map cache; + + llvm::SmallSet stack_related_args; }; +bool StackPointerResolverImpl::IsStackPointerBase(llvm::Value *canidate) { + return IsStackPointer(module, canidate) || + (this->stack_related_args.find(canidate) != + this->stack_related_args.end()); +} + bool StackPointerResolverImpl::ResolveFromValue(llvm::Value *val) { // Lookup the cache and return the value if it exist @@ -602,7 +621,7 @@ bool StackPointerResolverImpl::ResolveFromValue(llvm::Value *val) { val3 && val3 != val) { result = ResolveFromValue(val3); } else { - result = IsStackPointer(module, val); + result = this->IsStackPointerBase(val); } } @@ -649,8 +668,10 @@ bool StackPointerResolverImpl::ResolveFromConstantExpr(llvm::ConstantExpr *ce) { } StackPointerResolver::~StackPointerResolver(void) {} -StackPointerResolver::StackPointerResolver(llvm::Module *module) - : impl(new StackPointerResolverImpl(module)) {} +StackPointerResolver::StackPointerResolver( + llvm::Module *module, + llvm::ArrayRef additional_base_stack_ptrs) + : impl(new StackPointerResolverImpl(module, additional_base_stack_ptrs)) {} // Returns `true` if it looks like `val` is derived from a symbolic stack // pointer representation. @@ -659,7 +680,7 @@ bool StackPointerResolver::IsRelatedToStackPointer(llvm::Value *val) const { } bool IsRelatedToStackPointer(llvm::Module *module, llvm::Value *val) { - StackPointerResolverImpl impl(module); + StackPointerResolverImpl impl(module, {}); return impl.ResolveFromValue(val); } From 0648c6fb644d1db5882c0a032e0e2009d1d2d7e1 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Fri, 13 Jan 2023 02:26:48 +1100 Subject: [PATCH 099/163] Don't run the first set of function passes again --- lib/Optimize.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 00b34c35e..3f5f046b5 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -287,6 +287,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); mpm.run(module, mam); + llvm::ModulePassManager second_mpm; llvm::FunctionPassManager second_fpm; AddTransformRemillJumpIntrinsics(second_fpm, xr); @@ -315,8 +316,9 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); - mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); - mpm.run(module, mam); + second_mpm.addPass( + llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); + second_mpm.run(module, mam); // Get rid of all final uses of `__anvill_pc`. if (lifter.Options().should_remove_anvill_pc) { From c1bda738ec73a7b5522eb216942fc1c9b56e8c84 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 12 Jan 2023 11:27:34 -0500 Subject: [PATCH 100/163] fix lte for case where there is no variable greater than the current offset --- lib/Passes/ReplaceStackReferences.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 2dc66ac2b..907e88432 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -169,6 +169,10 @@ class StackModel { std::optional GetParamLte(std::int64_t off) { auto prec = this->frame.lower_bound(off); if (prec == this->frame.end()) { + if (this->frame.begin() != this->frame.end() && + this->frame.begin()->first <= off) { + return this->frame.begin()->second; + } return std::nullopt; } @@ -194,6 +198,9 @@ class StackModel { return std::nullopt; } + LOG(INFO) << "value found lte offset: " << vlte->decl.mem_offset << " " + << off; + auto offset_into_var = off - vlte->decl.mem_offset; if (offset_into_var < static_cast(GetParamDeclSize(vlte->decl))) { @@ -289,6 +296,10 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( } collision = true; } + + LOG(INFO) << "Escaping stack access " << stack_offset << " " + << remill::LLVMThingToString(use->get()); + // otherwise we are going to escape the abstract stack to_replace_vars.push_back({use, stack_offset}); } From ca29ee667f04571bb6a12d7dbcc0aaa3eb05bb87 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 12 Jan 2023 11:32:43 -0500 Subject: [PATCH 101/163] Revert "Don't run the first set of function passes again" This reverts commit 0648c6fb644d1db5882c0a032e0e2009d1d2d7e1. --- lib/Optimize.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 3f5f046b5..00b34c35e 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -287,7 +287,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); mpm.run(module, mam); - llvm::ModulePassManager second_mpm; llvm::FunctionPassManager second_fpm; AddTransformRemillJumpIntrinsics(second_fpm, xr); @@ -316,9 +315,8 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); - second_mpm.addPass( - llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); - second_mpm.run(module, mam); + mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); + mpm.run(module, mam); // Get rid of all final uses of `__anvill_pc`. if (lifter.Options().should_remove_anvill_pc) { From 06eaf86b726d6afc840aed3f0a68bbf0594c75f6 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 12 Jan 2023 11:52:26 -0500 Subject: [PATCH 102/163] adapt pointer to integer when needed --- lib/Passes/ReplaceStackReferences.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 907e88432..56e612ac5 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -309,16 +310,26 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( } for (auto [use, v] : to_replace_vars) { - use->get()->dump(); - + auto use_of_variable = use; + auto replace_use = [use_of_variable](llvm::Value *with_ptr) { + if (llvm::isa(use_of_variable->get()->getType())) { + use_of_variable->set(with_ptr); + } else if (llvm::isa( + use_of_variable->get()->getType())) { + if (auto insn = + llvm::dyn_cast(use_of_variable->getUser())) { + llvm::CastInst::Create(llvm::Instruction::CastOps::PtrToInt, with_ptr, + use_of_variable->get()->getType(), "", insn); + } + } + }; if (std::holds_alternative(v)) { - - use->set(std::get(v)); + replace_use(std::get(v)); } else { auto offset = std::get(v); auto ptr = stk.PointerToStackMemberFromOffset(ent_insert, offset); if (ptr) { - use->set(*ptr); + replace_use(*ptr); } else { LOG(ERROR) << "No pointer for offset " << offset << " was supposed to use " From 58224b70c9ac5c5748297ebc02c23fbeb1b6a797 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 30 Jan 2023 13:31:46 -0500 Subject: [PATCH 103/163] Fix CI Build (#343) * switch to rustup * add cargo path * fix typo * switch remill submod * fix pass tests * use gh action * only warn when missing reg * allow for missing registers in affine eqs * log as error * Fix errors when decoding opaque pointer specs * Fix macos build * more robustness * add pointer displacements * hacky solution for dynamic use of stack * actually do the replacement when cast to int * get max stack depth from ghidra * fix assert to allow for max depth * fix insert point for casts * bump to remill version that allows additional parameters by default * escape anvill_sp if a stack reference isnt recoverable --------- Co-authored-by: Francesco Bertolaccini --- .github/workflows/build.yml | 4 +- data_specifications/specification.proto | 10 +++ include/anvill/Declarations.h | 31 ++++++++- lib/Declarations.cpp | 52 ++++++++++----- lib/Lifters/BasicBlockLifter.cpp | 25 ++------ lib/Lifters/BasicBlockLifter.h | 1 - lib/Lifters/FunctionLifter.cpp | 4 +- lib/Passes/ReplaceStackReferences.cpp | 42 ++++++++----- lib/Protobuf.cpp | 36 +++++++++-- lib/Specification.cpp | 4 +- remill | 2 +- tests/anvill_passes/src/RecoverEntityUses.cpp | 63 +++++++++---------- .../anvill_passes/src/TransformRemillJump.cpp | 54 ++++++++-------- 13 files changed, 205 insertions(+), 123 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8796624d8..a8c7ca1ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,9 +141,7 @@ jobs: git config --global user.name "github-actions[bot]" - name: Install stable rust - shell: bash - run: | - apt-get install -y cargo + uses: dtolnay/rust-toolchain@stable - name: "Install Just" shell: bash diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 0ef8d0822..99f6a4f7b 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -281,7 +281,17 @@ message StackEffects { message StackFrame { + // The size of the "static frame": locals + params + return_addr uint64 frame_size = 1; + // Stack frame pointer depths are relative to the return address offset + int64 return_address_offset = 2; + // parameter size allows + // us to compute the + // stack depth of the lowest address when the stack grows down + uint64 parameter_size = 3; + + // the maximum depth the stack reaches beyond the return_addr + uint64 max_frame_depth = 4; } message Function { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 69a1f8010..b53a9fdd9 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -217,6 +218,10 @@ class BasicBlockContext { virtual size_t GetStackSize() const = 0; + virtual size_t GetMaxStackSize() const = 0; + + virtual size_t GetPointerDisplacement() const = 0; + virtual const std::vector &ReturnValue() const = 0; // Deduplicates locations and ensures there are no overlapping decls @@ -250,12 +255,22 @@ class AbstractStack { std::vector stack_types; std::vector components; size_t total_size; + size_t pointer_displacement; public: + // The displacement required to make all offset accesses positive + size_t GetPointerDisplacement() const { + return pointer_displacement; + }; + + + // The pointer displacement is the size above the zero point of the stack, typically return pointer offset + parameter size AbstractStack(llvm::LLVMContext &context, - std::vector components, bool stack_grows_down); + std::vector components, bool stack_grows_down, + size_t pointer_displacement); - size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; + std::optional + StackOffsetFromStackPointer(std::int64_t stack_off) const; std::int64_t StackPointerFromStackOffset(size_t offset) const; @@ -292,6 +307,10 @@ class SpecBlockContext : public BasicBlockContext { virtual size_t GetStackSize() const override; + virtual size_t GetMaxStackSize() const override; + + virtual size_t GetPointerDisplacement() const override; + protected: virtual const std::vector &LiveParamsAtEntry() const override; @@ -342,6 +361,12 @@ struct FunctionDecl : public CallableDecl { std::uint64_t stack_depth; + std::uint64_t maximum_depth; + + std::int64_t ret_ptr_offset{0}; + + std::size_t parameter_size{0}; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; @@ -352,6 +377,8 @@ struct FunctionDecl : public CallableDecl { return Create(func, arch.get()); } + size_t GetPointerDisplacement() const; + // Create a function declaration from an LLVM function. static Result Create(llvm::Function &func, const remill::Arch *arch); diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 44198c443..33e8734ff 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -229,6 +229,10 @@ size_t SpecBlockContext::GetStackSize() const { return decl.stack_depth; } +size_t SpecBlockContext::GetMaxStackSize() const { + return decl.maximum_depth; +} + SpecBlockContext::SpecBlockContext( const FunctionDecl &decl, SpecStackOffsets offsets, @@ -239,6 +243,10 @@ SpecBlockContext::SpecBlockContext( live_params_at_entry(std::move(live_params_at_entry)), live_params_at_exit(std::move(live_params_at_exit)) {} +size_t SpecBlockContext::GetPointerDisplacement() const { + return this->decl.GetPointerDisplacement(); +} + const std::vector &SpecBlockContext::LiveParamsAtExit() const { return this->live_params_at_exit; } @@ -450,6 +458,10 @@ V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { } } // namespace +size_t FunctionDecl::GetPointerDisplacement() const { + return this->parameter_size + this->return_stack_pointer_offset; +} + SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { return SpecBlockContext( *this, GetWithDef(addr, this->stack_offsets, SpecStackOffsets()), @@ -457,29 +469,30 @@ SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { GetWithDef(addr, this->live_regs_at_exit, std::vector())); } -size_t +std::optional AbstractStack::StackOffsetFromStackPointer(std::int64_t stack_off) const { - // The offset is relative to the stack pointer but on entry to function so offset into the stack (negative if grows down postive otherwise... - // unless we have parameters) - // TODO(Ian): this wont do the correct thing for stack parameters - - // welp lets crash if we are going to do the wrong thing - CHECK((this->stack_grows_down && stack_off <= 0) || - (!this->stack_grows_down && stack_off >= 0)); - if (this->stack_grows_down) { + auto displaced_offset = + stack_off - static_cast(this->pointer_displacement); LOG(INFO) << this->total_size; - return this->total_size + stack_off; + LOG(INFO) << "disp: " << this->pointer_displacement; + LOG(INFO) << "Displaced offset: " << displaced_offset; + if (!(static_cast(this->total_size) >= + llabs(displaced_offset))) { + return std::nullopt; + } + return this->total_size + displaced_offset; } else { - return stack_off; + return this->pointer_displacement + stack_off; } } std::int64_t AbstractStack::StackPointerFromStackOffset(size_t offset) const { if (stack_grows_down) { - return static_cast(offset) - this->total_size; + return (static_cast(offset) - this->total_size) + + this->pointer_displacement; } else { - return offset; + return offset - this->pointer_displacement; } } @@ -501,8 +514,12 @@ std::optional AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, std::int64_t stack_off) const { auto off = this->StackOffsetFromStackPointer(stack_off); + if (!off) { + return std::nullopt; + } + auto i32 = llvm::IntegerType::getInt32Ty(this->context); - LOG(INFO) << "Looking for offset" << off; + LOG(INFO) << "Looking for offset" << *off; auto curr_off = 0; auto curr_ind = 0; for (auto [sz, ptr] : this->components) { @@ -511,7 +528,7 @@ AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, LOG(INFO) << curr_off << " " << sz; return ir.CreateGEP(this->stack_types[curr_ind], ptr, {llvm::ConstantInt::get(i32, 0), - llvm::ConstantInt::get(i32, off - curr_off)}); + llvm::ConstantInt::get(i32, *off - curr_off)}); } curr_off += sz; curr_ind++; @@ -528,11 +545,12 @@ llvm::Type *AbstractStack::StackTypeFromSize(llvm::LLVMContext &context, AbstractStack::AbstractStack(llvm::LLVMContext &context, std::vector components, - bool stack_grows_down) + bool stack_grows_down, size_t pointer_displacement) : context(context), stack_grows_down(stack_grows_down), components(std::move(components)), - total_size(0) { + total_size(0), + pointer_displacement(pointer_displacement) { if (stack_grows_down) { std::reverse(this->components.begin(), this->components.end()); diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index b92e5d2b3..2cd2c90a6 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -195,12 +195,12 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( builder.CreateStore(raddr, npc); builder.CreateStore(raddr, pc); } else { - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics, true); + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); } return !cc.stop; } else if (std::holds_alternative(override)) { remill::AddTerminatingTailCall(block, intrinsics.function_return, - intrinsics, true); + intrinsics); return false; } @@ -571,21 +571,6 @@ void BasicBlockLifter::UnpackLiveValues( } -size_t -BasicBlockLifter::StackOffsetFromStackPointer(std::int64_t stack_off) const { - // The offset is relative to the stack pointer but on entry to function so offset into the stack (negative if grows down postive otherwise... - // unless we have parameters) - // TODO(Ian): this wont do the correct thing for stack parameters - - // welp lets crash if we are going to do the wrong thing - CHECK((options.stack_frame_recovery_options.stack_grows_down && - stack_off <= 0) || - (!options.stack_frame_recovery_options.stack_grows_down && - stack_off >= 0)); - return std::abs(stack_off); -} - - void BasicBlockLifter::CallBasicBlockFunction( llvm::IRBuilder<> &builder, llvm::Value *parent_state, const CallableBasicBlockFunction &cbfunc, llvm::Value *parent_stack) const { @@ -606,11 +591,13 @@ void BasicBlockLifter::CallBasicBlockFunction( auto bbvars = this->block_context->LiveParamsAtEntryAndExit(); AbstractStack stack( - builder.getContext(), {{decl.stack_depth, parent_stack}}, - this->options.stack_frame_recovery_options.stack_grows_down); + builder.getContext(), {{decl.maximum_depth, parent_stack}}, + this->options.stack_frame_recovery_options.stack_grows_down, + decl.GetPointerDisplacement()); PointerProvider ptr_provider = [&builder, this, out_param_locals, &bbvars, &stack](size_t index) -> llvm::Value * { auto repr_var = bbvars[index]; + LOG(INFO) << "Lifting: " << repr_var.param.name << " for call"; if (repr_var.param.mem_reg) { auto stack_ptr = stack.PointerToStackMemberFromOffset( builder, repr_var.param.mem_offset); diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index a5ad17559..61f40fb92 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -127,7 +127,6 @@ class BasicBlockLifter : public CodeLifter { void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, const CallableBasicBlockFunction &, llvm::Value *parent_stack) const; - size_t StackOffsetFromStackPointer(std::int64_t stack_off) const; BasicBlockLifter(BasicBlockLifter &&) = default; }; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index a1b5a2a9f..53f3b0d40 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -533,8 +533,8 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { auto abstract_stack = ir.CreateAlloca( - AbstractStack::StackTypeFromSize(llvm_context, decl.stack_depth), nullptr, - "abstract_stack"); + AbstractStack::StackTypeFromSize(llvm_context, decl.maximum_depth), + nullptr, "abstract_stack"); // Add a branch between the first block of the lifted function, which sets // up some local variables, and the block that will contain the lifted // instruction. diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 56e612ac5..3c922986d 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include "anvill/Declarations.h" #include "anvill/Utils.h" + namespace anvill { namespace { @@ -243,19 +245,19 @@ class StackModel { llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( llvm::Function &F, llvm::FunctionAnalysisManager &AM, const BasicBlockContext &cont) { - F.dump(); - - - size_t overrunsz = 100; + size_t overrunsz = cont.GetMaxStackSize() - cont.GetStackSize(); llvm::IRBuilder<> ent_insert(&F.getEntryBlock(), F.getEntryBlock().begin()); auto overrunptr = ent_insert.CreateAlloca( AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); + LOG(INFO) << "Replacing stack vars in bb: " << std::hex + << *anvill::GetBasicBlockAddr(&F); AbstractStack stk( F.getContext(), {{cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F)}, {overrunsz, overrunptr}}, - lifter.Options().stack_frame_recovery_options.stack_grows_down); + lifter.Options().stack_frame_recovery_options.stack_grows_down, + cont.GetPointerDisplacement()); StackModel smodel(cont, this->lifter.Options().arch, stk); @@ -283,8 +285,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( auto referenced_variable = smodel.GetOverlappingParam(stack_offset); //TODO(Ian) handle nonzero offset - if (referenced_variable.has_value() && - llvm::isa(use->get()->getType())) { + if (referenced_variable.has_value()) { auto g = anvill::ProvidePointerFromFunctionArgs( &F, referenced_variable->decl.index, this->lifter.Options(), cont); @@ -295,6 +296,10 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( to_replace_vars.push_back({use, *ptr}); continue; } + LOG(ERROR) << "Couldnt create a pointer for offset " + << referenced_variable->offset << " into a " + << remill::LLVMThingToString( + referenced_variable->decl.decl.type); collision = true; } @@ -311,16 +316,20 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( for (auto [use, v] : to_replace_vars) { auto use_of_variable = use; - auto replace_use = [use_of_variable](llvm::Value *with_ptr) { + auto replace_use = [use_of_variable, overrunptr](llvm::Value *with_ptr) { if (llvm::isa(use_of_variable->get()->getType())) { use_of_variable->set(with_ptr); } else if (llvm::isa( use_of_variable->get()->getType())) { - if (auto insn = - llvm::dyn_cast(use_of_variable->getUser())) { - llvm::CastInst::Create(llvm::Instruction::CastOps::PtrToInt, with_ptr, - use_of_variable->get()->getType(), "", insn); + + llvm::IRBuilder<> ir(overrunptr); + + if (auto ptr = llvm::dyn_cast(with_ptr)) { + ir.SetInsertPoint(ptr->getNextNode()); } + + use_of_variable->set( + ir.CreatePointerCast(with_ptr, use_of_variable->get()->getType())); } }; if (std::holds_alternative(v)) { @@ -331,12 +340,15 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( if (ptr) { replace_use(*ptr); } else { - LOG(ERROR) << "No pointer for offset " << offset - << " was supposed to use " - << stk.StackOffsetFromStackPointer(offset); + LOG(ERROR) << "No pointer for offset " << offset; + auto off = stk.StackOffsetFromStackPointer(offset); + if (off) { + LOG(ERROR) << "Was supposed to use offset " << *off; + } } } } + CHECK(!llvm::verifyFunction(F, &llvm::errs())); diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index b840d8e15..4ffe6a919 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -442,7 +443,12 @@ ProtobufTranslator::DecodeType(const ::specification::TypeSpec &obj) const { } } if (obj.has_alias()) { - return type_map.at(obj.alias()); + if (type_map.count(obj.alias())) { + return type_map.at(obj.alias()); + } else { + LOG(ERROR) << "Unknown alias id " << obj.alias(); + return {BaseType::Void}; + } } return {"Unknown/invalid data type" + obj.DebugString()}; @@ -506,8 +512,13 @@ Result ProtobufTranslator::DecodeFunction( if (!function.has_frame()) { return std::string("All functions should have a frame"); } + auto frame = function.frame(); + + decl.stack_depth = frame.frame_size(); + decl.ret_ptr_offset = frame.return_address_offset(); + decl.parameter_size = frame.parameter_size(); - decl.stack_depth = function.frame().frame_size(); + decl.maximum_depth = decl.GetPointerDisplacement() + frame.max_frame_depth(); this->ParseCFGIntoFunction(function, decl); @@ -553,8 +564,11 @@ void ProtobufTranslator::AddLiveValuesToBB( LOG_IF(FATAL, var.repr_var().values_size() != 1) << "Symbols must be represented by a single valuedecl."; auto param = DecodeParameter(var); - LOG_IF(FATAL, !param.Succeeded()) << "Unable to decode live parameter"; - v.push_back(param.TakeValue()); + if (!param.Succeeded()) { + LOG(ERROR) << "Unable to decode live parameter " << param.TakeError(); + } else { + v.push_back(param.TakeValue()); + } } } @@ -593,7 +607,12 @@ void ProtobufTranslator::ParseCFGIntoFunction( auto target_vdecl = DecodeValue(symval.target_value().values()[0], stackptr_type_spec, "Unable to get value decl for stack offset relation"); - LOG_IF(FATAL, !target_vdecl.Succeeded()) << "Failed to lift value"; + + if (!target_vdecl.Succeeded()) { + LOG(ERROR) << "Failed to lift value " << target_vdecl.TakeError(); + continue; + } + if (!symval.has_curr_val()) { LOG(FATAL) << "Mapping should have current value"; } @@ -681,6 +700,13 @@ anvill::Result ProtobufTranslator::DecodeType( return type_map[alias]; } auto &type = type_map[alias]; + + // The alias may not be present in the map in case of opaque pointers + if (!map.count(alias)) { + LOG(ERROR) << "No alias definition for " << obj.alias(); + return {BaseType::Void}; + } + auto res = DecodeType(map.at(alias), map); if (!res.Succeeded()) { return res.TakeError(); diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 04ba6d6fc..2330c15aa 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -111,6 +111,7 @@ SpecificationImpl::ParseSpecification( if (!maybe_var.Succeeded()) { auto err = maybe_var.Error(); dec_err.push_back(err); + continue; } auto var_obj = maybe_var.Value(); auto var_address = var_obj.address; @@ -428,7 +429,8 @@ SpecBlockContexts::GetBasicBlockContextForAddr(uint64_t addr) const { return std::nullopt; } - return std::cref(cont->second); + return std::optional>{ + std::cref(cont->second)}; } // Call `cb` on each function in the spec, until `cb` returns `false`. diff --git a/remill b/remill index a8ead7b58..2a517ba3e 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit a8ead7b58496f0fc90100eee67de1eee74cdc8c3 +Subproject commit 2a517ba3e38f29da08ac42ae8add90b254e7cf04 diff --git a/tests/anvill_passes/src/RecoverEntityUses.cpp b/tests/anvill_passes/src/RecoverEntityUses.cpp index e770ff7c0..fccad3124 100644 --- a/tests/anvill_passes/src/RecoverEntityUses.cpp +++ b/tests/anvill_passes/src/RecoverEntityUses.cpp @@ -1,58 +1,57 @@ +#include #include +#include +#include #include #include #include #include #include #include -#include -#include + #include -#include -#include #include "Utils.h" namespace anvill { - TEST_SUITE("RecoverEntityUses") { - TEST_CASE("Regression test for unresolved anvill_pc") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, "TestingUnresolvedEntity.ll"); +TEST_SUITE("RecoverEntityUses") { + TEST_CASE("Regression test for unresolved anvill_pc") { + auto llvm_context = anvill::CreateContextWithOpaquePointers(); + auto module = LoadTestData(*llvm_context, "TestingUnresolvedEntity.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); - REQUIRE(arch != nullptr); + auto arch = + remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), + remill::GetArchName("x86")); + REQUIRE(arch != nullptr); - auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + auto ctrl_flow_provider = anvill::NullControlFlowProvider(); + TypeDictionary tyDict(*llvm_context); - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, - std::move(ctrl_flow_provider), - mem_prov); + NullTypeProvider ty_prov(tyDict); + NullMemoryProvider mem_prov; + anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); - anvill::LifterOptions options(arch.get(), *module, ty_prov, - std::move(ctrl_flow_provider), mem_prov); + anvill::LifterOptions options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); - // memory and types will not get used and create lifter with null - anvill::EntityLifter lifter(options); + // memory and types will not get used and create lifter with null + anvill::EntityLifter lifter(options); - EntityCrossReferenceResolver xref(lifter); + EntityCrossReferenceResolver xref(lifter); - ConvertAddressesToEntityUses conv(xref); + ConvertAddressesToEntityUses conv(xref); - auto func = module->getFunction("sub_12b30__A_SBI_B_0.6"); + auto func = module->getFunction("sub_12b30__A_SBI_B_0.6"); - REQUIRE(func != nullptr); + REQUIRE(func != nullptr); - llvm::FunctionAnalysisManager fam; + llvm::FunctionAnalysisManager fam; - conv.run(*func, fam); - func->dump(); - } - } + conv.run(*func, fam); + func->dump(); + } } +} // namespace anvill diff --git a/tests/anvill_passes/src/TransformRemillJump.cpp b/tests/anvill_passes/src/TransformRemillJump.cpp index 860e66844..c721a18de 100644 --- a/tests/anvill_passes/src/TransformRemillJump.cpp +++ b/tests/anvill_passes/src/TransformRemillJump.cpp @@ -6,19 +6,19 @@ * the LICENSE file found in the root directory of this source tree. */ +#include #include +#include +#include #include #include #include #include #include #include -#include -#include + #include -#include -#include #include "Utils.h" namespace anvill { @@ -33,22 +33,24 @@ TEST_SUITE("TransformRemillJump_Test0") { remill::GetArchName("amd64")); REQUIRE(arch != nullptr); - auto ctrl_flow_provider = - anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + auto ctrl_flow_provider = anvill::NullControlFlowProvider(); + TypeDictionary tyDict(*llvm_context); - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options( - arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + NullTypeProvider ty_prov(tyDict); + NullMemoryProvider mem_prov; + anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); - anvill::LifterOptions options(arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + anvill::LifterOptions options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); // memory and types will not get used and create lifter with null anvill::EntityLifter lifter(options); EntityCrossReferenceResolver xref(lifter); + module->getFunction("__remill_intrinsics")->eraseFromParent(); + CHECK(RunFunctionPass(module.get(), TransformRemillJumpIntrinsics(xref))); const auto ret_func = module->getFunction("__remill_function_return"); @@ -68,22 +70,22 @@ TEST_SUITE("TransformRemillJump_Test1") { remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), remill::GetArchName("amd64")); REQUIRE(arch != nullptr); - auto ctrl_flow_provider = - anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + auto ctrl_flow_provider = anvill::NullControlFlowProvider(); + TypeDictionary tyDict(*llvm_context); - NullTypeProvider ty_prov(tyDict); - NullMemoryProvider mem_prov; - anvill::LifterOptions lift_options( - arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + NullTypeProvider ty_prov(tyDict); + NullMemoryProvider mem_prov; + anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); - anvill::LifterOptions options(arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + anvill::LifterOptions options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); // memory and types will not get used and create lifter with null anvill::EntityLifter lifter(options); EntityCrossReferenceResolver xref(lifter); - + module->getFunction("__remill_intrinsics")->eraseFromParent(); CHECK(RunFunctionPass(module.get(), TransformRemillJumpIntrinsics(xref))); const auto ret_func = module->getFunction("__remill_function_return"); @@ -114,13 +116,14 @@ TEST_SUITE("TransformRemillJump_ARM32_0") { anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, std::move(ctrl_flow_provider), mem_prov); - anvill::LifterOptions options(arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + anvill::LifterOptions options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); // memory and types will not get used and create lifter with null anvill::EntityLifter lifter(options); EntityCrossReferenceResolver xref(lifter); - + module->getFunction("__remill_intrinsics")->eraseFromParent(); CHECK(RunFunctionPass(module.get(), TransformRemillJumpIntrinsics(xref))); const auto ret_func = module->getFunction("__remill_function_return"); @@ -151,13 +154,14 @@ TEST_SUITE("TransformRemillJump_ARM32_1") { anvill::LifterOptions lift_options(arch.get(), *module, ty_prov, std::move(ctrl_flow_provider), mem_prov); - anvill::LifterOptions options(arch.get(), *module,ty_prov,std::move(ctrl_flow_provider),mem_prov); + anvill::LifterOptions options(arch.get(), *module, ty_prov, + std::move(ctrl_flow_provider), mem_prov); // memory and types will not get used and create lifter with null anvill::EntityLifter lifter(options); EntityCrossReferenceResolver xref(lifter); - + module->getFunction("__remill_intrinsics")->eraseFromParent(); CHECK(RunFunctionPass(module.get(), TransformRemillJumpIntrinsics(xref))); const auto ret_func = module->getFunction("__remill_function_return"); From ab8c130a196e93264986fcbd3440971929291950 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 2 Feb 2023 11:24:01 -0500 Subject: [PATCH 104/163] Reference fixes (#344) * add block method to get func addr * pass initialized program counter to lifted function * fix long standing bug * use arrayref instead * skip unsized types --- include/anvill/Declarations.h | 4 ++++ include/anvill/Lifters.h | 5 ++++- lib/Declarations.cpp | 4 ++++ lib/Lifters/BasicBlockLifter.cpp | 8 ++++--- lib/Lifters/DataLifter.cpp | 6 ++--- lib/Lifters/ValueLifter.cpp | 23 ++++++++++---------- lib/Lifters/ValueLifter.h | 14 ++++++------ lib/Optimize.cpp | 1 + lib/Passes/ConvertPointerArithmeticToGEP.cpp | 4 ++++ 9 files changed, 43 insertions(+), 26 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index b53a9fdd9..b0ff3b601 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -222,6 +222,8 @@ class BasicBlockContext { virtual size_t GetPointerDisplacement() const = 0; + virtual uint64_t GetParentFunctionAddress() const = 0; + virtual const std::vector &ReturnValue() const = 0; // Deduplicates locations and ensures there are no overlapping decls @@ -305,6 +307,8 @@ class SpecBlockContext : public BasicBlockContext { virtual const std::vector &ReturnValue() const override; + virtual uint64_t GetParentFunctionAddress() const override; + virtual size_t GetStackSize() const override; virtual size_t GetMaxStackSize() const override; diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 8334b7e5a..2adc54968 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include #include @@ -357,7 +359,8 @@ class ValueLifter { // Interpret `data` as the backing bytes to initialize an `llvm::Constant` // of type `type_of_data`. `loc_ea`, if non-null, is the address at which // `data` appears. - llvm::Constant *Lift(std::string_view data, llvm::Type *type_of_data) const; + llvm::Constant *Lift(llvm::ArrayRef data, + llvm::Type *type_of_data) const; // Interpret `ea` as being a pointer to a value of type `value_type` in the // address space `address_space`. diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 33e8734ff..2fb7823d6 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -225,6 +225,10 @@ const std::vector &SpecBlockContext::ReturnValue() const { return this->decl.returns; } +uint64_t SpecBlockContext::GetParentFunctionAddress() const { + return this->decl.address; +} + size_t SpecBlockContext::GetStackSize() const { return decl.stack_depth; } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 2cd2c90a6..027031dea 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -495,11 +495,13 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Initialize the program counter auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); - ir.CreateStore(this->options.program_counter_init_procedure(ir, pc_reg, 0), - pc_ptr); + auto pc_val = this->options.program_counter_init_procedure( + ir, pc_reg, this->block_def.addr); + ir.CreateStore(pc_val, pc_ptr); + std::array args = { - this->state_ptr, pc, mem_res, next_pc_out}; + this->state_ptr, pc_val, mem_res, next_pc_out}; auto ret_mem = ir.CreateCall(this->lifted_func, args); diff --git a/lib/Lifters/DataLifter.cpp b/lib/Lifters/DataLifter.cpp index a45962d7a..4ddc849f5 100644 --- a/lib/Lifters/DataLifter.cpp +++ b/lib/Lifters/DataLifter.cpp @@ -180,15 +180,13 @@ llvm::Constant *DataLifter::LiftData(const VariableDecl &decl, << std::dec; break; } - bytes.push_back(byte); } } if (bytes_accessable) { - value = lifter_context.value_lifter.Lift( - std::string_view(reinterpret_cast(bytes.data()), bytes.size()), - type, lifter_context, decl.address); + value = lifter_context.value_lifter.Lift(bytes, type, lifter_context, + decl.address); } diff --git a/lib/Lifters/ValueLifter.cpp b/lib/Lifters/ValueLifter.cpp index be980f57a..abb550223 100644 --- a/lib/Lifters/ValueLifter.cpp +++ b/lib/Lifters/ValueLifter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,14 +28,14 @@ namespace anvill { // Consume `num_bytes` of bytes from `data`, interpreting them as an integer, // and update `data` in place, bumping out the first `num_bytes` of consumed // data. -llvm::APInt ValueLifterImpl::ConsumeBytesAsInt(std::string_view &data, +llvm::APInt ValueLifterImpl::ConsumeBytesAsInt(llvm::ArrayRef &data, unsigned num_bytes) const { llvm::APInt result(num_bytes * 8u, 0u); for (auto i = 0u; i < num_bytes; ++i) { result <<= 8u; - result |= data[i]; + result |= static_cast(data[i]); } - data = data.substr(num_bytes); + data = data.drop_front(num_bytes); if (dl.isLittleEndian() && 1u < num_bytes) { return result.byteSwap(); @@ -234,9 +235,9 @@ llvm::Constant *ValueLifterImpl::GetPointer(uint64_t ea, llvm::Type *value_type, // Interpret `data` as the backing bytes to initialize an `llvm::Constant` // of type `type_of_data`. This requires access to `ent_lifter` to be able // to lift pointer types that will reference declared data/functions. -llvm::Constant *ValueLifterImpl::Lift(std::string_view data, llvm::Type *type, - EntityLifterImpl &ent_lifter, - uint64_t loc_ea) const { +llvm::Constant * +ValueLifterImpl::Lift(llvm::ArrayRef data, llvm::Type *type, + EntityLifterImpl &ent_lifter, uint64_t loc_ea) const { switch (type->getTypeID()) { @@ -295,8 +296,8 @@ llvm::Constant *ValueLifterImpl::Lift(std::string_view data, llvm::Type *type, const auto elm_type = struct_type->getStructElementType(i); const auto offset = layout->getElementOffset(i); CHECK_LE(prev_offset, offset); - auto const_elm = - Lift(data.substr(offset), elm_type, ent_lifter, loc_ea + offset); + auto const_elm = Lift(data.drop_front(offset), elm_type, ent_lifter, + loc_ea + offset); initializer_list.push_back(const_elm); prev_offset = offset; } @@ -314,7 +315,7 @@ llvm::Constant *ValueLifterImpl::Lift(std::string_view data, llvm::Type *type, for (auto i = 0u; i < num_elms; ++i) { const auto elm_offset = i * elm_size; - auto const_elm = Lift(data.substr(elm_offset), elm_type, ent_lifter, + auto const_elm = Lift(data.drop_front(elm_offset), elm_type, ent_lifter, loc_ea + elm_offset); initializer_list.push_back(const_elm); } @@ -332,7 +333,7 @@ llvm::Constant *ValueLifterImpl::Lift(std::string_view data, llvm::Type *type, for (auto i = 0u; i < num_elms; ++i) { const auto elm_offset = i * elm_size; - auto const_elm = Lift(data.substr(elm_offset), elm_type, ent_lifter, + auto const_elm = Lift(data.drop_front(elm_offset), elm_type, ent_lifter, loc_ea + elm_offset); initializer_list.push_back(const_elm); } @@ -374,7 +375,7 @@ ValueLifter::ValueLifter(const EntityLifter &entity_lifter_) // Interpret `data` as the backing bytes to initialize an `llvm::Constant` // of type `type_of_data`. `loc_ea`, if non-null, is the address at which // `data` appears. -llvm::Constant *ValueLifter::Lift(std::string_view data, +llvm::Constant *ValueLifter::Lift(llvm::ArrayRef data, llvm::Type *type_of_data) const { return impl->value_lifter.Lift(data, type_of_data, *impl, 0); } diff --git a/lib/Lifters/ValueLifter.h b/lib/Lifters/ValueLifter.h index b4acbd850..472fcfcb6 100644 --- a/lib/Lifters/ValueLifter.h +++ b/lib/Lifters/ValueLifter.h @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace llvm { @@ -36,11 +37,11 @@ class ValueLifterImpl { // Consume `num_bytes` of bytes from `data`, interpreting them as an integer, // and update `data` in place, bumping out the first `num_bytes` of consumed // data. - llvm::APInt ConsumeBytesAsInt(std::string_view &data, + llvm::APInt ConsumeBytesAsInt(llvm::ArrayRef &data, unsigned num_bytes) const; // Consume `size` bytes of data from `data`, and update `data` in place. - inline llvm::APInt ConsumeBytesAsInt(std::string_view &data, + inline llvm::APInt ConsumeBytesAsInt(llvm::ArrayRef &data, llvm::TypeSize size) const { return ConsumeBytesAsInt( data, static_cast(static_cast(size))); @@ -49,7 +50,7 @@ class ValueLifterImpl { // Interpret `data` as the backing bytes to initialize an `llvm::Constant` // of type `type_of_data`. This requires access to `ent_lifter` to be able // to lift pointer types that will reference declared data/functions. - llvm::Constant *Lift(std::string_view data, llvm::Type *type_of_data, + llvm::Constant *Lift(llvm::ArrayRef data, llvm::Type *type_of_data, EntityLifterImpl &ent_lifter, uint64_t loc_ea) const; // Lift pointers at `ea`. @@ -66,10 +67,9 @@ class ValueLifterImpl { // // Returns an `llvm::GlobalValue *` if the pointer is associated with a // known or plausible entity, and an `llvm::Constant *` otherwise. - llvm::Constant *GetPointer( - uint64_t ea, llvm::Type *value_type, - EntityLifterImpl &ent_lifter, - uint64_t loc_ea, unsigned address_space=0) const; + llvm::Constant *GetPointer(uint64_t ea, llvm::Type *value_type, + EntityLifterImpl &ent_lifter, uint64_t loc_ea, + unsigned address_space = 0) const; private: llvm::Constant *GetFunctionPointer(const FunctionDecl &decl, diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 00b34c35e..43c4cbdf1 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -305,6 +305,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); + AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); second_fpm.addPass(CodeQualityStatCollector()); second_fpm.addPass(llvm::VerifierPass()); AddConvertXorsToCmps(second_fpm); diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 6a50820b2..daaa65012 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -425,6 +425,10 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { auto cur_spec = pointee_spec; auto cur_type = pointee_type; + if (!cur_type->isSized()) { + continue; + } + { auto cur_size = dl.getTypeSizeInBits(cur_type) / 8; auto index = offset / cur_size; From 02c8766152e6c0e10dc30c3dd8d8219fff6927e5 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Tue, 7 Feb 2023 08:24:48 +1100 Subject: [PATCH 105/163] PowerPC Support (#340) * PowerPC Support * Add PowerPC architecture to Protobuf specification * Pin Remill submodule to PPC branch * Fix inconsistency with mismatching address and PC reg size * Add PPC case to util functions * More address size fixes * Another address type fix * Log decoding error message * bump remill * bump cxx-common, drop llvm 14, bump mac runner * Bump Remill * Use `auto` where appropriate Co-authored-by: lkorenc * Remove unused `injected_sret` * Use type aliases for function types * update deprecated CI * remove binja from ci * update path in script * lifting tools updates * update ignore list * run on large runner * update gh actions * Revert "update path in script" This reverts commit b51e67664e1a5bd427819bf5230c5cbdc155c35a. * Revert "lifting tools updates" This reverts commit 420546089b73ee5a58b27ef0d56a8183596a9c8b. * ABI fixes and comments * Bump Remill * Rename since EABI is based off SysV * Pin Remill to master * ignore go tests * upload error logs on CI failure * tar + compress logs first * enable globbing * fix shell syntax * reduce cores so build is less likely to race * decouple requirements from matrix name * bump lifting-tools-ci * remove ignore for ppc * fix missing comma * add language overrides * ignore hidden files --------- Co-authored-by: William Tan <1284324+Ninja3047@users.noreply.github.com> Co-authored-by: lkorenc Co-authored-by: 2over12 --- .github/workflows/build.yml | 123 +++++---- ci/challenge_bins_test_settings.json | 52 ++-- data_specifications/specification.proto | 1 + include/anvill/Lifters.h | 23 +- lib/Arch/Arch.h | 13 +- lib/Arch/PPC_SysV.cpp | 324 ++++++++++++++++++++++++ lib/CMakeLists.txt | 1 + lib/Lifters/BasicBlockLifter.cpp | 11 +- lib/Lifters/FunctionLifter.cpp | 5 +- lib/Lifters/Options.cpp | 9 +- lib/Specification.cpp | 1 + lib/Utils.cpp | 4 +- libraries/lifting-tools-ci | 2 +- remill | 2 +- 14 files changed, 470 insertions(+), 101 deletions(-) create mode 100644 lib/Arch/PPC_SysV.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a8c7ca1ee..b54ffd8c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: steps: - name: Clone the anvill repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Python dependencies run: | @@ -52,10 +52,8 @@ jobs: matrix: image: - { name: "ubuntu", tag: "20.04" } - binja: - - { channel: "headless", version: "3.1.3479" } - llvm: ["14", "15"] - cxxcommon_version: ["v0.2.16"] + llvm: ["15"] + cxxcommon_version: ["v0.2.22"] runs-on: ubuntu-20.04 container: @@ -83,14 +81,14 @@ jobs: ${rel_ccache_path} \ ${rel_workspace_path} - echo ::set-output name=SOURCE::$(pwd)/${rel_source_path} - echo ::set-output name=REL_SOURCE::${rel_source_path} - echo ::set-output name=BUILD::$(pwd)/${rel_build_path} - echo ::set-output name=REL_BUILD::${rel_build_path} - echo ::set-output name=INSTALL::$(pwd)/${rel_install_path} - echo ::set-output name=DOWNLOADS::$(pwd)/${rel_downloads_path} - echo ::set-output name=CCACHE::$(pwd)/${rel_ccache_path} - echo ::set-output name=WORKSPACE::$(pwd)/${rel_workspace_path} + echo "SOURCE=$(pwd)/${rel_source_path}" >> ${GITHUB_OUTPUT} + echo "REL_SOURCE=${rel_source_path}" >> ${GITHUB_OUTPUT} + echo "BUILD=$(pwd)/${rel_build_path}" >> ${GITHUB_OUTPUT} + echo "REL_BUILD=${rel_build_path}" >> ${GITHUB_OUTPUT} + echo "INSTALL=$(pwd)/${rel_install_path}" >> ${GITHUB_OUTPUT} + echo "DOWNLOADS=$(pwd)/${rel_downloads_path}" >> ${GITHUB_OUTPUT} + echo "CCACHE=$(pwd)/${rel_ccache_path}" >> ${GITHUB_OUTPUT} + echo "WORKSPACE=$(pwd)/${rel_workspace_path}" >> ${GITHUB_OUTPUT} - name: Update the cache (downloads) uses: actions/cache@v3 @@ -148,12 +146,12 @@ jobs: run: | cargo install just - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" java-version: "11" - name: Clone Ghidra Spec Generation - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 repository: "trailofbits/irene3" @@ -208,13 +206,13 @@ jobs: echo "${destination_path}/installed/x64-linux-rel/tools/llvm-${{matrix.llvm}}" >> $GITHUB_PATH - echo ::set-output name=PATH::${destination_path} + echo "PATH=${destination_path}" >> ${GITHUB_OUTPUT} - name: Select the build job count shell: bash id: build_job_count run: | - echo ::set-output name=VALUE::$(($(nproc) + 1)) + echo "VALUE=$(($(nproc) + 1))" >> ${GITHUB_OUTPUT} - name: Configure remill working-directory: ${{ steps.build_paths.outputs.BUILD }} @@ -255,7 +253,7 @@ jobs: cmake --build remill_build \ --target install - echo ::set-output name=PATH::${DESTDIR} + echo "PATH=${DESTDIR}" >> ${GITHUB_OUTPUT} - name: Build, configure and install remill (Presets) working-directory: ${{ steps.build_paths.outputs.SOURCE }}/anvill/remill @@ -311,7 +309,7 @@ jobs: cmake --build anvill_build \ --target install - echo ::set-output name=PATH::${DESTDIR} + echo "PATH=${DESTDIR}" >> ${GITHUB_OUTPUT} - name: Build, configure and install anvill (Presets) working-directory: ${{ steps.build_paths.outputs.SOURCE }}/anvill @@ -325,7 +323,6 @@ jobs: scripts/build-preset.sh debug - name: Run the tests env: - BINJA_DECODE_KEY: ${{ secrets.BINJA_DECODE_KEY }} CTEST_OUTPUT_ON_FAILURE: 1 shell: bash @@ -353,9 +350,9 @@ jobs: shell: bash working-directory: ${{ steps.build_paths.outputs.BUILD }} run: | - echo ::set-output name=DEB_PACKAGE_PATH::${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.deb) - echo ::set-output name=RPM_PACKAGE_PATH::${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.rpm) - echo ::set-output name=TGZ_PACKAGE_PATH::${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.tar.gz) + echo "DEB_PACKAGE_PATH=${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.deb)" >> ${GITHUB_OUTPUT} + echo "RPM_PACKAGE_PATH=${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.rpm)" >> ${GITHUB_OUTPUT} + echo "TGZ_PACKAGE_PATH=${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.tar.gz)" >> ${GITHUB_OUTPUT} - name: Install the DEB package run: | @@ -371,6 +368,18 @@ jobs: --decompile-cmd "anvill-decompile-spec" env: TOB_AMP_PASSPHRASE: ${{secrets.TOB_AMP_PASSPHRASE}} + - name: Tar and Compress logs + if: failure() + run: | + shopt -s globstar + tar -cf test-errs.tar.xz ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill/amp-challenge-bins/**/std* + shell: bash + - name: Upload stderr/stdout logs on error + if: failure() + uses: actions/upload-artifact@v3 + with: + name: AMP Challenge Binaries logs + path: test-errs.tar.xz # - name: Run Integration Tests (AnghaBench 1K) # shell: bash @@ -382,19 +391,19 @@ jobs: # --decompile-cmd "anvill-decompile-json" - name: Store the DEB package - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_deb_package path: ${{ steps.package_names.outputs.DEB_PACKAGE_PATH }} - name: Store the RPM package - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_rpm_package path: ${{ steps.package_names.outputs.RPM_PACKAGE_PATH }} - name: Store the TGZ package - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: ${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm${{ matrix.llvm }}_tgz_package path: ${{ steps.package_names.outputs.TGZ_PACKAGE_PATH }} @@ -409,10 +418,10 @@ jobs: fail-fast: false matrix: os: ["macos-11"] - llvm: ["14", "15"] - cxxcommon_version: ["v0.2.16"] + llvm: ["15"] + cxxcommon_version: ["v0.2.22"] - runs-on: ${{ matrix.os }} + runs-on: macos-12 steps: - name: Setup the build paths @@ -432,17 +441,17 @@ jobs: ${rel_ccache_path} \ ${rel_workspace_path} - echo ::set-output name=SOURCE::$(pwd)/${rel_source_path} - echo ::set-output name=REL_SOURCE::${rel_source_path} - echo ::set-output name=BUILD::$(pwd)/${rel_build_path} - echo ::set-output name=REL_BUILD::${rel_build_path} - echo ::set-output name=INSTALL::$(pwd)/${rel_install_path} - echo ::set-output name=DOWNLOADS::$(pwd)/${rel_downloads_path} - echo ::set-output name=CCACHE::$(pwd)/${rel_ccache_path} - echo ::set-output name=WORKSPACE::$(pwd)/${rel_workspace_path} + echo "SOURCE=$(pwd)/${rel_source_path}" >> ${GITHUB_OUTPUT} + echo "REL_SOURCE=${rel_source_path}" >> ${GITHUB_OUTPUT} + echo "BUILD=$(pwd)/${rel_build_path}" >> ${GITHUB_OUTPUT} + echo "REL_BUILD=${rel_build_path}" >> ${GITHUB_OUTPUT} + echo "INSTALL=$(pwd)/${rel_install_path}" >> ${GITHUB_OUTPUT} + echo "DOWNLOADS=$(pwd)/${rel_downloads_path}" >> ${GITHUB_OUTPUT} + echo "CCACHE=$(pwd)/${rel_ccache_path}" >> ${GITHUB_OUTPUT} + echo "WORKSPACE=$(pwd)/${rel_workspace_path}" >> ${GITHUB_OUTPUT} - name: Update the cache (downloads) - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ${{ steps.build_paths.outputs.DOWNLOADS }} @@ -453,7 +462,7 @@ jobs: gitmodules_${{ matrix.os }}_${{ matrix.llvm }}_${{ matrix.cxxcommon_version }} - name: Update the cache (ccache) - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ${{ steps.build_paths.outputs.CCACHE }} @@ -470,7 +479,7 @@ jobs: ninja - name: Clone the anvill repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill fetch-depth: 0 @@ -512,13 +521,13 @@ jobs: echo "${destination_path}/installed/x64-osx-rel/bin" >> $GITHUB_PATH - echo ::set-output name=PATH::${destination_path} + echo "PATH=${destination_path}" >> ${GITHUB_OUTPUT} - name: Select the build job count shell: bash id: build_job_count run: | - echo ::set-output name=VALUE::$(($(sysctl -n hw.logicalcpu) + 1)) + echo "VALUE=$(($(sysctl -n hw.logicalcpu) + 1))" >> ${GITHUB_OUTPUT} - name: Configure remill working-directory: ${{ steps.build_paths.outputs.BUILD }} @@ -559,7 +568,7 @@ jobs: cmake --build remill_build \ --target install - echo ::set-output name=PATH::${DESTDIR} + echo "PATH=${DESTDIR}" >> ${GITHUB_OUTPUT} - name: Build, configure and install remill (Presets) working-directory: ${{ steps.build_paths.outputs.SOURCE }}/anvill/remill @@ -615,7 +624,7 @@ jobs: cmake --build anvill_build \ --target install - echo ::set-output name=PATH::${DESTDIR} + echo "PATH=${DESTDIR}" >> ${GITHUB_OUTPUT} - name: Build, configure and install anvill (Presets) working-directory: ${{ steps.build_paths.outputs.SOURCE }}/anvill @@ -655,14 +664,26 @@ jobs: shell: bash working-directory: ${{ steps.build_paths.outputs.BUILD }} run: | - echo ::set-output name=TGZ_PACKAGE_PATH::${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.tar.gz) + echo "TGZ_PACKAGE_PATH=${{ steps.build_paths.outputs.REL_BUILD }}/$(ls *.tar.gz)" >> ${GITHUB_OUTPUT} - name: Store the TGZ package - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: ${{ matrix.os }}_llvm${{ matrix.llvm }}_tgz_package path: ${{ steps.package_names.outputs.TGZ_PACKAGE_PATH }} + passes_ci: + needs: build_linux + runs-on: ubuntu-20.04 + if: always() + steps: + - name: Successful linux build + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + - name: Failing linux build + if: ${{ !(contains(needs.*.result, 'success')) }} + run: exit 1 + release_packages: # Do not run the release procedure if any of the builds has failed needs: [build_linux, build_macos] @@ -671,7 +692,7 @@ jobs: steps: - name: Clone the anvill repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: anvill fetch-depth: 0 @@ -683,7 +704,7 @@ jobs: ./scripts/generate_changelog.sh changelog.md - name: Download all artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 - name: Draft the new release id: create_release @@ -738,12 +759,10 @@ jobs: strategy: matrix: - llvm: ["14", "15"] + llvm: ["15"] ubuntu: ["20.04"] - binja: - - { channel: "headless", version: "3.1.3479" } steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true - name: Build LLVM ${{ matrix.llvm }} on ${{ matrix.ubuntu }} diff --git a/ci/challenge_bins_test_settings.json b/ci/challenge_bins_test_settings.json index c4313ae90..ac4f83205 100644 --- a/ci/challenge_bins_test_settings.json +++ b/ci/challenge_bins_test_settings.json @@ -1,21 +1,39 @@ { "timeout.seconds": "800", "tests.ignore": [ - "challenge-3_amd64_program_go_patched.elf/output.json", - "challenge-3_amd64_program_go.elf/output.json", - "challenge-3_x86_program_go_patched.elf/output.json", - "challenge-3_x86_program_go.elf/output.json", - "challenge-3_arm64_program_go_patched.elf/output.json", - "challenge-3_arm64_program_go.elf/output.json", - "challenge-3_armv7_program_go_patched.elf/output.json", - "challenge-3_armv7_program_go.elf/output.json", - "challenge-3_arm64_program_go_patched.elf", - "challenge-3_arm64_program_go.elf", - "challenge-3_armv7_program_go_patched.elf", + "challenge-3_arm64_program_go_patched", + "challenge-3_arm64_program_go", + "challenge-3_armv7_program_go_patched", + "challenge-3_armv7_program_go", + "challenge-3_amd64_program_go_patched", + "challenge-3_amd64_program_go", + "challenge-3_x86_program_go_patched", + "challenge-3_x86_program_go", "challenge-3_armv7_program_go.elf", - "challenge-3_amd64_program_go_patched.elf", - "challenge-3_amd64_program_go.elf", - "challenge-3_x86_program_go_patched.elf", - "challenge-3_x86_program_go.elf" - ] -} \ No newline at end of file + "challenge-3_armv7_program_go_patched.elf" + ], + "language_id_overrides": { + "ppc-adc_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-edma_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-etimer_freq_measurement_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-etimer_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-fccu_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-flexcan_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-hello_world.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-hello_world_mpc5777c.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-hello_world_pll.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-linflexd_lin_master_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-linflexd_lin_slave_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-linflexd_uart_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-lp_stop_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-sgen_flexpwm_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-siul_registerprotection_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-spi_dma_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-spi_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-tsens_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc-xbic_dma_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", + "ppc_vle_booke_example.elf": "PowerPC:BE:64:VLE-32addr", + "challenge-10-ppc-vle-program_c.elf": "PowerPC:BE:64:VLE-32addr", + "challenge-10-ppc-program_c.elf": "PowerPC:BE:64:VLE-32addr" + } +} diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 99f6a4f7b..f79dbf0b3 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -14,6 +14,7 @@ enum Arch { ARCH_AARCH32 = 8; ARCH_SPARC32 = 9; ARCH_SPARC64 = 10; + ARCH_PPC = 11; } enum OS { diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 2adc54968..caa839efa 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -150,6 +150,15 @@ class StackFrameRecoveryOptions { bool stack_pointer_is_negative{false}; }; +using ProgramCounterInitProcedure = + std::function; + +using StackPointerInitProcedure = std::function; + +using ReturnAddressInitProcedure = std::function; + // Options that direct the behavior of the code and data lifters. class LifterOptions { public: @@ -170,7 +179,7 @@ class LifterOptions { // (add (ptrtoint __anvill_pc), ) // static llvm::Value *SymbolicProgramCounterInit(llvm::IRBuilderBase &ir, - const remill::Register *pc_reg, + llvm::Type *address_type, uint64_t func_address); // Initialize the return address with a constant expression of the form: @@ -250,23 +259,17 @@ class LifterOptions { // (add (ptrtoint __anvill_pc)
) // // Otherwise, a concrete integer is used, i.e. `
`. - std::function - program_counter_init_procedure; + ProgramCounterInitProcedure program_counter_init_procedure; // Procedure for producing an initial value of the stack pointer on entry // to a function. An `IRBuilderBase` is provided for building values within // the entry block of the function at the given address. - std::function - stack_pointer_init_procedure; + StackPointerInitProcedure stack_pointer_init_procedure; // Procedure for producing an initial value of the return address on entry // to a function. An `IRBuilderBase` is provided for building values within // the entry block of the function at the given address. - std::function - return_address_init_procedure; + ReturnAddressInitProcedure return_address_init_procedure; StackFrameRecoveryOptions stack_frame_recovery_options; diff --git a/lib/Arch/Arch.h b/lib/Arch/Arch.h index d832e6bb8..3a01b497e 100644 --- a/lib/Arch/Arch.h +++ b/lib/Arch/Arch.h @@ -8,13 +8,12 @@ #pragma once +#include #include #include #include -#include - namespace llvm { class Function; namespace CallingConv { @@ -132,11 +131,10 @@ class CallingConvention { static Result CreateCCFromArch(const remill::Arch *arch); - static Result CreateCCFromArchAndID( - const remill::Arch *arch, llvm::CallingConv::ID cc_id); + static Result + CreateCCFromArchAndID(const remill::Arch *arch, llvm::CallingConv::ID cc_id); - Result - AllocateSignature(llvm::Function &func); + Result AllocateSignature(llvm::Function &func); virtual llvm::Error AllocateSignature(FunctionDecl &fdecl, llvm::Function &func) = 0; @@ -175,6 +173,9 @@ class CallingConvention { static std::unique_ptr CreateSPARC64_C(const remill::Arch *arch); + static std::unique_ptr + CreatePPC_SysV(const remill::Arch *arch); + private: const llvm::CallingConv::ID identity; }; diff --git a/lib/Arch/PPC_SysV.cpp b/lib/Arch/PPC_SysV.cpp new file mode 100644 index 000000000..85f453ccf --- /dev/null +++ b/lib/Arch/PPC_SysV.cpp @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2022-present Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "AllocationState.h" +#include "Arch.h" + +namespace anvill { +namespace { + +// The PowerPC SystemV ABI documentation (which Freescale's EABI is based off) describes the +// parameter and return registers: +// https://math-atlas.sourceforge.net/devel/assembly/elfspec_ppc.pdf +// +// Despite having 64-bit GPRs, the e200 series toolchain conforms to Freescale's 32-bit PowerPC EABI +// meaning that 32-bit values should be stored in the parameter and return registers: +// https://www.nxp.com/files-static/32bit/doc/ref_manual/e200z759CRM.pdf +static const std::vector kParamRegConstraints = { + // GPRs + RegisterConstraint({VariantConstraint("R3", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R4", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R5", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R6", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R7", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R8", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R9", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R10", kTypeIntegral, kMaxBit32)}), + // FPRs + RegisterConstraint({VariantConstraint("F1", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F2", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F3", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F4", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F5", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F6", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F7", kTypeFloat, kMaxBit64)}), + RegisterConstraint({VariantConstraint("F8", kTypeFloat, kMaxBit64)}), +}; + +static const std::vector kReturnRegConstraints = { + // GPRs + RegisterConstraint({VariantConstraint("R3", kTypeIntegral, kMaxBit32)}), + RegisterConstraint({VariantConstraint("R4", kTypeIntegral, kMaxBit32)}), + // FPRs + RegisterConstraint({VariantConstraint("F1", kTypeFloat, kMaxBit64)}), +}; + +// Used to split things like `i64`s into multiple `i32`s. +static llvm::Type *IntegerTypeSplitter(llvm::Type *type) { + auto int_ty = llvm::dyn_cast(type); + if (!int_ty) { + return nullptr; + } + + auto width = int_ty->getPrimitiveSizeInBits(); + if (width <= 32) { + return nullptr; + } + + auto num_elements = (width + 31) / 32; + auto i32_ty = llvm::Type::getInt32Ty(type->getContext()); + return llvm::ArrayType::get(i32_ty, num_elements); +} + +} // namespace + +class PPC_SysV : public CallingConvention { + public: + explicit PPC_SysV(const remill::Arch *arch); + virtual ~PPC_SysV() = default; + + llvm::Error AllocateSignature(FunctionDecl &fdecl, + llvm::Function &func) override; + + private: + llvm::Error BindParameters(llvm::Function &function, + std::vector ¶m_decls); + + llvm::Error BindReturnValues(llvm::Function &function, + std::vector &ret_decls); + + const std::vector ¶meter_register_constraints; + const std::vector &return_register_constraints; +}; + +std::unique_ptr +CallingConvention::CreatePPC_SysV(const remill::Arch *arch) { + return std::make_unique(arch); +} + +PPC_SysV::PPC_SysV(const remill::Arch *arch) + : CallingConvention(llvm::CallingConv::C, arch), + parameter_register_constraints(kParamRegConstraints), + return_register_constraints(kReturnRegConstraints) {} + +llvm::Error PPC_SysV::AllocateSignature(FunctionDecl &fdecl, + llvm::Function &func) { + + auto err = BindReturnValues(func, fdecl.returns); + if (remill::IsError(err)) { + return err; + } + + err = BindParameters(func, fdecl.params); + if (remill::IsError(err)) { + return err; + } + + fdecl.return_stack_pointer_offset = 0; + fdecl.return_stack_pointer = arch->RegisterByName("R1"); + + fdecl.return_address.reg = arch->RegisterByName("LR"); + fdecl.return_address.type = fdecl.return_address.reg->type; + + return llvm::Error::success(); +} + +llvm::Error +PPC_SysV::BindReturnValues(llvm::Function &function, + std::vector &ret_values) { + + auto ret_type = function.getReturnType(); + LOG(INFO) << "Binding on return " << remill::LLVMThingToString(ret_type); + + // If there is an sret parameter then it is a special case. + if (function.hasStructRetAttr()) { + auto &value_declaration = ret_values.emplace_back(); + + value_declaration.type = llvm::PointerType::get(function.getContext(), 0); + + if (!ret_type->isVoidTy()) { + return llvm::createStringError( + std::errc::invalid_argument, + "Function '%s' with sret-attributed parameter has non-void return type '%s'", + function.getName().str().c_str(), + remill::LLVMThingToString(ret_type).c_str()); + } + + // Indirect return values are passed by pointer through `R3`. + value_declaration.reg = arch->RegisterByName("R3"); + return llvm::Error::success(); + } + + switch (ret_type->getTypeID()) { + case llvm::Type::VoidTyID: return llvm::Error::success(); + + case llvm::Type::IntegerTyID: { + const auto *int_ty = llvm::dyn_cast(ret_type); + const auto int64_ty = llvm::Type::getInt64Ty(int_ty->getContext()); + const auto bit_width = int_ty->getBitWidth(); + if (bit_width <= 64) { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.reg = arch->RegisterByName("R3"); + value_declaration.type = ret_type; + return llvm::Error::success(); + + // Split the integer across `R3` and `R4`. + } else if (bit_width <= 128) { + const char *ret_names[] = {"R3", "R4"}; + for (auto i = 0u; i < 2 && (64 * i) < bit_width; ++i) { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.reg = arch->RegisterByName(ret_names[i]); + value_declaration.type = int64_ty; + } + return llvm::Error::success(); + + // The integer is too big to be split across registers, fall back to + // return-value optimization. + } else { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.type = + llvm::PointerType::get(function.getContext(), 0); + value_declaration.reg = arch->RegisterByName("R3"); + return llvm::Error::success(); + } + } + + // Pointers always fit into `R3`. + case llvm::Type::PointerTyID: { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.reg = arch->RegisterByName("R3"); + value_declaration.type = ret_type; + return llvm::Error::success(); + } + + case llvm::Type::HalfTyID: + case llvm::Type::FloatTyID: + case llvm::Type::DoubleTyID: { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.reg = arch->RegisterByName("F1"); + value_declaration.type = ret_type; + return llvm::Error::success(); + } + + case llvm::Type::FP128TyID: { + + // double types gets split into two integer registers + const auto fp128_ty = llvm::Type::getDoubleTy(ret_type->getContext()); + + // get the primitive type size to split them to registers + const auto bit_width = fp128_ty->getScalarSizeInBits(); + const char *reg_names[] = {"F1", "F2"}; + for (auto i = 0u; i < 2 && (64 * i) < bit_width; ++i) { + auto &value_declaration = ret_values.emplace_back(); + value_declaration.reg = arch->RegisterByName(reg_names[i]); + value_declaration.type = fp128_ty; + } + return llvm::Error::success(); + } + + // Try to split the composite type over registers, and fall back on RVO + // if it's not possible. + case llvm::Type::FixedVectorTyID: + case llvm::Type::ArrayTyID: + case llvm::Type::StructTyID: { + AllocationState alloc_ret(return_register_constraints, arch, this); + alloc_ret.config.type_splitter = IntegerTypeSplitter; + auto mapping = alloc_ret.TryRegisterAllocate(*ret_type); + + // There is a valid split over registers, so add the mapping + if (mapping) { + return alloc_ret.CoalescePacking(mapping.getValue(), ret_values); + + // Composite type splitting; Unlike with x86, LLVM doesn't naturally + // perform RVO on large structures returned by value from bitcode. + } else { + break; + } + } + + default: break; + } + + return llvm::createStringError( + std::errc::invalid_argument, + "Could not allocate unsupported type '%s' to return register in function '%s'", + remill::LLVMThingToString(ret_type).c_str(), + function.getName().str().c_str()); +} + +llvm::Error +PPC_SysV::BindParameters(llvm::Function &function, + std::vector ¶meter_declarations) { + + const auto param_names = TryRecoverParamNames(function); + llvm::DataLayout dl(function.getParent()); + + // Used to keep track of which registers have been allocated + AllocationState alloc_param(parameter_register_constraints, arch, this); + alloc_param.config.type_splitter = IntegerTypeSplitter; + + unsigned stack_offset = 0; + const auto sp_reg = arch->RegisterByName("R1"); + + for (auto &argument : function.args()) { + const auto ¶m_name = param_names[argument.getArgNo()]; + const auto param_type = argument.getType(); + + auto allocation = alloc_param.TryRegisterAllocate(*param_type); + + // Try to allocate from a register. If a register is not available then + // allocate from the stack. + if (allocation) { + auto prev_size = parameter_declarations.size(); + + for (const auto ¶m_decl : allocation.getValue()) { + auto &declaration = parameter_declarations.emplace_back(); + declaration.type = param_decl.type; + if (param_decl.reg) { + declaration.reg = param_decl.reg; + } else { + declaration.mem_offset = param_decl.mem_offset; + declaration.mem_reg = param_decl.mem_reg; + } + } + + // The parameter fit in one register / stack slot. + if ((prev_size + 1u) == parameter_declarations.size()) { + if (!param_name.empty()) { + parameter_declarations[prev_size].name = param_name; + } + + // The parameter was spread across multiple registers. + } else if (!param_name.empty()) { + for (auto i = 0u; i < (parameter_declarations.size() - prev_size); + ++i) { + parameter_declarations[prev_size + i].name = + param_name + std::to_string(i); + } + } + + } else { + auto &declaration = parameter_declarations.emplace_back(); + declaration.type = param_type; + declaration.mem_offset = static_cast(stack_offset); + declaration.mem_reg = sp_reg; + stack_offset += dl.getTypeAllocSize(argument.getType()); + + if (!param_name.empty()) { + declaration.name = param_name; + } + } + } + + return llvm::Error::success(); +} + +} // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 2a23fbe09..0c5e6049d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -73,6 +73,7 @@ set(anvill_arch_SOURCES "Arch/AArch64_C.cpp" "Arch/AllocationState.cpp" "Arch/Arch.cpp" + "Arch/PPC_SysV.cpp" "Arch/SPARC32_C.cpp" "Arch/SPARC64_C.cpp" "Arch/X86_64_SysV.cpp" diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 027031dea..68bf8aa01 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -182,8 +182,8 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( if (cc.target_address.has_value()) { this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics, - this->options.program_counter_init_procedure(builder, this->pc_reg, - *cc.target_address)); + this->options.program_counter_init_procedure( + builder, this->address_type, *cc.target_address)); } else { this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics); @@ -496,10 +496,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Initialize the program counter auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); auto pc_val = this->options.program_counter_init_procedure( - ir, pc_reg, this->block_def.addr); + ir, this->address_type, this->block_def.addr); ir.CreateStore(pc_val, pc_ptr); - std::array args = { this->state_ptr, pc_val, mem_res, next_pc_out}; @@ -583,7 +582,7 @@ void BasicBlockLifter::CallBasicBlockFunction( args[0] = parent_stack; args[remill::kPCArgNum] = options.program_counter_init_procedure( - builder, pc_reg, cbfunc.GetBlock().addr); + builder, this->address_type, cbfunc.GetBlock().addr); args[remill::kMemoryPointerArgNum] = remill::LoadMemoryPointer(builder, this->intrinsics); @@ -696,4 +695,4 @@ BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *func, } -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 53f3b0d40..7fe0d2b10 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -273,7 +273,8 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( llvm::IRBuilder<> ir(block); // Initialize the program counter. - auto pc = options.program_counter_init_procedure(ir, pc_reg, func_address); + auto pc = + options.program_counter_init_procedure(ir, address_type, func_address); ir.CreateStore(pc, pc_ptr); // Initialize the stack pointer. @@ -408,7 +409,7 @@ void FunctionLifter::VisitBlock(CodeBlock blk, for (uint64_t succ : blk.outgoing_edges) { sw->addCase(llvm::ConstantInt::get( - llvm::cast(this->pc_reg_type), succ), + llvm::cast(this->address_type), succ), this->GetOrCreateBlock(succ)); } } diff --git a/lib/Lifters/Options.cpp b/lib/Lifters/Options.cpp index 6d60edb47..d50fd3b79 100644 --- a/lib/Lifters/Options.cpp +++ b/lib/Lifters/Options.cpp @@ -78,15 +78,14 @@ LifterOptions::SymbolicStackPointerInit(llvm::IRBuilderBase &ir, // Initialize the program counter with a constant expression of the form: // // (ptrtoint __anvill_pc) -llvm::Value * -LifterOptions::SymbolicProgramCounterInit(llvm::IRBuilderBase &ir, - const remill::Register *pc_reg, - uint64_t func_address) { +llvm::Value *LifterOptions::SymbolicProgramCounterInit(llvm::IRBuilderBase &ir, + llvm::Type *address_type, + uint64_t func_address) { auto &context = ir.getContext(); auto block = ir.GetInsertBlock(); auto module = block->getModule(); - auto type = remill::RecontextualizeType(pc_reg->type, context); + auto type = remill::RecontextualizeType(address_type, context); auto base_pc = module->getGlobalVariable(kSymbolicPCName); if (!base_pc) { diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 2330c15aa..a6db53794 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -290,6 +290,7 @@ GetArch(llvm::LLVMContext &context, break; case ::specification::ARCH_SPARC32: arch_name = remill::kArchSparc32; break; case ::specification::ARCH_SPARC64: arch_name = remill::kArchSparc64; break; + case ::specification::ARCH_PPC: arch_name = remill::kArchPPC; break; } switch (spec.operating_system()) { diff --git a/lib/Utils.cpp b/lib/Utils.cpp index f875e62e2..9c036e4c3 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -494,6 +494,7 @@ static bool IsStackPointerRegName(llvm::Module *module, case llvm::Triple::ArchType::sparcel: case llvm::Triple::ArchType::sparcv9: return reg_name == "o6" || reg_name == "sp"; + case llvm::Triple::ArchType::ppc: return reg_name == "r1"; default: return false; } } @@ -511,7 +512,8 @@ static bool IsProgramCounterRegName(llvm::Module *module, return reg_name == "pc" || reg_name == "wpc"; case llvm::Triple::ArchType::aarch64_32: case llvm::Triple::ArchType::arm: - case llvm::Triple::ArchType::armeb: return reg_name == "pc"; + case llvm::Triple::ArchType::armeb: + case llvm::Triple::ArchType::ppc: return reg_name == "pc"; case llvm::Triple::ArchType::sparc: case llvm::Triple::ArchType::sparcel: case llvm::Triple::ArchType::sparcv9: diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 4a42f5517..c114ae6f4 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 4a42f5517731a5710f2e27aceacd89b837b7440b +Subproject commit c114ae6f41d89120b80575b6246a5d016ac518bf diff --git a/remill b/remill index 2a517ba3e..fb018c96e 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit 2a517ba3e38f29da08ac42ae8add90b254e7cf04 +Subproject commit fb018c96e96dcfc933517d017a9b12f01b8a9708 From 69e79b93445332b6492b1a732ee21193153efc65 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 9 Feb 2023 00:29:03 +1100 Subject: [PATCH 106/163] Stop unnecessarily parsing the semantic module for each basic block (#347) --- lib/Lifters/BasicBlockLifter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 68bf8aa01..562058848 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -42,7 +42,6 @@ CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { this->RecursivelyInlineFunctionCallees(bbfunc.func); - anvill::EntityLifter lifter(options); return CallableBasicBlockFunction(bbfunc.func, block_def, std::move(*this)); } From 491541b51b25e424c28964199ce2cc533986cf77 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Mon, 13 Feb 2023 09:17:49 -0500 Subject: [PATCH 107/163] Update CI (#345) * update path in script * update ignore list * run on large runner * Remove `LLVM_VERSION` compatibility checks for LLVM < 15 * Remove references to LLVM 14 in README and scripts * update macos version in matrix * Revert "update macos version in matrix" This reverts commit a19dd6713539fcbe75f9e15bf4cddd8d12964020. * bump anvill ci tools, add languageid overrides * handle created block with no terminator * add error for undecodeable instructions * bump java version * upload error logs on CI failure * tar + compress logs first * enable all ppc binaries except for challenge 10 * bump remill to fix infinite loop * more aggressively fix up terminators * prevent underflow and add parameter offset * handle cdi bugs by assuming the max depth is the stack depth * bump testing tool --------- Co-authored-by: Alex Cameron Co-authored-by: 2over12 --- .github/workflows/build.yml | 5 +- README.md | 4 +- bin/Decompile/Main.cpp | 3 - ci/challenge_bins_test_settings.json | 4 +- data_specifications/specification.proto | 2 + include/anvill/Declarations.h | 2 + lib/Declarations.cpp | 2 +- lib/Lifters/BasicBlockLifter.cpp | 5 +- lib/Lifters/FunctionLifter.cpp | 14 ++- lib/Passes/ReplaceStackReferences.cpp | 2 + lib/Protobuf.cpp | 7 ++ libraries/lifting-tools-ci | 2 +- remill | 2 +- scripts/build.sh | 4 - scripts/run-on-anghabench.sh | 2 +- scripts/test-amp-challenge-bins.sh | 2 +- .../anvill_passes/src/BranchRecoveryPass.cpp | 14 +-- tests/anvill_passes/src/BrightenPointers.cpp | 90 +++++++++---------- .../src/InstructionFolderPass.cpp | 7 +- tests/anvill_passes/src/RecoverEntityUses.cpp | 11 ++- .../src/RemoveStackPointerCExprs.cpp | 8 +- .../src/SinkSelectionsIntoBranchTargets.cpp | 35 ++++---- .../src/SplitStackFrameAtReturnAddress.cpp | 10 +-- .../anvill_passes/src/TestAbstractStackBB.cpp | 8 +- .../anvill_passes/src/TransformRemillJump.cpp | 44 +++++---- tests/anvill_passes/src/Utils.cpp | 8 -- tests/anvill_passes/src/Utils.h | 2 - tests/anvill_passes/src/XorConversionPass.cpp | 9 +- tests/tools/src/TypeSpecification.cpp | 10 --- 29 files changed, 155 insertions(+), 163 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b54ffd8c2..294a08495 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,8 @@ jobs: llvm: ["15"] cxxcommon_version: ["v0.2.22"] - runs-on: ubuntu-20.04 + runs-on: + labels: gha-ubuntu-32 container: image: docker.pkg.github.com/lifting-bits/cxx-common/vcpkg-builder-${{ matrix.image.name }}:${{ matrix.image.tag }} @@ -149,7 +150,7 @@ jobs: - uses: actions/setup-java@v3 with: distribution: "temurin" - java-version: "11" + java-version: "17" - name: Clone Ghidra Spec Generation uses: actions/checkout@v3 with: diff --git a/README.md b/README.md index e0091baaf..dd097a7d8 100644 --- a/README.md +++ b/README.md @@ -103,10 +103,10 @@ Or you can tell CMake where to find the remill installation prefix by passing `- ### Docker image -To build via Docker run, specify the architecture, base Ubuntu image and LLVM version. For example, to build Anvill linking against LLVM 14 on Ubuntu 20.04 on AMD64 do: +To build via Docker run, specify the architecture, base Ubuntu image and LLVM version. For example, to build Anvill linking against LLVM 15 on Ubuntu 20.04 on AMD64 do: ```shell -ARCH=amd64; UBUNTU_VERSION=20.04; LLVM=14; \ +ARCH=amd64; UBUNTU_VERSION=20.04; LLVM=15; \ docker build . \ -t anvill-llvm${LLVM}-ubuntu${UBUNTU_VERSION}-${ARCH} \ -f Dockerfile \ diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index b51239e20..2b10ca92d 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -108,9 +108,6 @@ int main(int argc, char *argv[]) { remill::GetReference(maybe_buff); llvm::LLVMContext context; -#if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) - context.enableOpaquePointers(); -#endif llvm::Module module("lifted_code", context); auto maybe_spec = diff --git a/ci/challenge_bins_test_settings.json b/ci/challenge_bins_test_settings.json index ac4f83205..be82bc69b 100644 --- a/ci/challenge_bins_test_settings.json +++ b/ci/challenge_bins_test_settings.json @@ -9,8 +9,8 @@ "challenge-3_amd64_program_go", "challenge-3_x86_program_go_patched", "challenge-3_x86_program_go", - "challenge-3_armv7_program_go.elf", - "challenge-3_armv7_program_go_patched.elf" + "challenge-10-ppc-program_c.elf", + "challenge-10-ppc-vle-program_c.elf" ], "language_id_overrides": { "ppc-adc_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index f79dbf0b3..99e729d33 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -293,6 +293,8 @@ message StackFrame { // the maximum depth the stack reaches beyond the return_addr uint64 max_frame_depth = 4; + + int64 parameter_offset = 5; } message Function { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index b0ff3b601..53b7bc67c 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -369,6 +369,8 @@ struct FunctionDecl : public CallableDecl { std::int64_t ret_ptr_offset{0}; + std::int64_t parameter_offset{0}; + std::size_t parameter_size{0}; // Declare this function in an LLVM module. diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 2fb7823d6..515706505 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -463,7 +463,7 @@ V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { } // namespace size_t FunctionDecl::GetPointerDisplacement() const { - return this->parameter_size + this->return_stack_pointer_offset; + return this->parameter_size + this->parameter_offset; } SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 562058848..e8fc1fb2d 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -323,7 +323,10 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { LOG(INFO) << "Decoding at addr " << std::hex << addr; auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { - LOG(FATAL) << "Failed to decode insn in block " << std::hex << addr; + remill::AddTerminatingTailCall(bb, this->intrinsics.error, + this->intrinsics); + LOG(ERROR) << "Failed to decode insn in block " << std::hex << addr; + return; } reached_addr += inst.bytes.size(); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 7fe0d2b10..a71763eaf 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -425,7 +425,14 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, this->VisitBlock(blk, lifted_function_state, abstract_stack); } - CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); + // NOTE(Ian): some blocks may be empty ie. if the CFG communicates a possible transition to some undecodeable + // bytes so here we check for block transfers that got added that we havent initialized and add an error + // if we end up transferring there. + for (auto &blks : this->lifted_func->getBasicBlockList()) { + if (!blks.getTerminator()) { + llvm::BranchInst::Create(this->invalid_successor_block, &blks); + } + } } @@ -544,7 +551,8 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - ir.CreateBr(this->GetOrCreateBlock(this->func_address)); + auto entry_insn = this->GetOrCreateBlock(this->func_address); + ir.CreateBr(entry_insn); AnnotateInstructions(entry_block, pc_annotation_id, GetPCAnnotation(func_address)); @@ -553,6 +561,8 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // Go lift all instructions! VisitBlocks(lifted_func_st.state_ptr, abstract_stack); + CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); + // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. CallLiftedFunctionFromNativeFunction(decl); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 3c922986d..bcad821b6 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -252,6 +252,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( LOG(INFO) << "Replacing stack vars in bb: " << std::hex << *anvill::GetBasicBlockAddr(&F); + LOG(INFO) << "Stack size " << cont.GetStackSize(); + LOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); AbstractStack stk( F.getContext(), {{cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F)}, diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 4ffe6a919..5449744ff 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -517,9 +517,16 @@ Result ProtobufTranslator::DecodeFunction( decl.stack_depth = frame.frame_size(); decl.ret_ptr_offset = frame.return_address_offset(); decl.parameter_size = frame.parameter_size(); + decl.parameter_offset = frame.parameter_offset(); decl.maximum_depth = decl.GetPointerDisplacement() + frame.max_frame_depth(); + if (decl.maximum_depth < decl.stack_depth) { + LOG(ERROR) + << "Analyzed max depth is smaller than the initial depth overriding"; + decl.maximum_depth = decl.stack_depth; + } + this->ParseCFGIntoFunction(function, decl); auto link = function.func_linkage(); diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index c114ae6f4..2f5ae380e 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit c114ae6f41d89120b80575b6246a5d016ac518bf +Subproject commit 2f5ae380e78f2288ead5f4c1c6aef30c68c9d721 diff --git a/remill b/remill index fb018c96e..63406400b 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit fb018c96e96dcfc933517d017a9b12f01b8a9708 +Subproject commit 63406400b489c92190e81beb31593fe54e58eaac diff --git a/scripts/build.sh b/scripts/build.sh index 438a74ad2..d7c37904e 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -338,10 +338,6 @@ function Package function GetLLVMVersion { case ${1} in - 14) - LLVM_VERSION=llvm-14 - return 0 - ;; 15) LLVM_VERSION=llvm-15 return 0 diff --git a/scripts/run-on-anghabench.sh b/scripts/run-on-anghabench.sh index a8be0000f..375c10150 100644 --- a/scripts/run-on-anghabench.sh +++ b/scripts/run-on-anghabench.sh @@ -7,7 +7,7 @@ export BINJA_DECODE_KEY=__BINJA_DECODE_KEY__ export BINJA_CHANNEL=__BINJA_CHANNEL__ export BINJA_VERSION=__BINJA_VERSION__ -export LLVM_VERSION=14 +export LLVM_VERSION=15 export CC=clang-13 CXX=clang++-13 dpkg --add-architecture i386 diff --git a/scripts/test-amp-challenge-bins.sh b/scripts/test-amp-challenge-bins.sh index 87a9c61f2..5611ec921 100755 --- a/scripts/test-amp-challenge-bins.sh +++ b/scripts/test-amp-challenge-bins.sh @@ -109,7 +109,7 @@ do done FAILED="no" -for dir in challenge-binaries +for dir in binaries do echo "[+] Testing ${dir}" ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py \ diff --git a/tests/anvill_passes/src/BranchRecoveryPass.cpp b/tests/anvill_passes/src/BranchRecoveryPass.cpp index 5f9c31751..6da3ccc40 100644 --- a/tests/anvill_passes/src/BranchRecoveryPass.cpp +++ b/tests/anvill_passes/src/BranchRecoveryPass.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,7 +11,6 @@ #include -#include #include "Utils.h" namespace anvill { @@ -28,8 +28,8 @@ static llvm::Function *FindFunction(llvm::Module *module, std::string name) { TEST_SUITE("BranchRecoveryPass") { TEST_CASE("Run analysis on aarch64") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "BranchRecoveryAarch64.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "BranchRecoveryAarch64.ll"); auto target_function = FindFunction(mod.get(), "slice"); CHECK(target_function != nullptr); llvm::FunctionPassManager fpm; @@ -109,8 +109,8 @@ TEST_SUITE("BranchRecoveryPass") { } TEST_CASE("Run analysis sliced function sub") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "RecoverSubBranch.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "RecoverSubBranch.ll"); auto target_function = FindFunction(mod.get(), "slice"); CHECK(target_function != nullptr); llvm::FunctionPassManager fpm; @@ -191,8 +191,8 @@ TEST_SUITE("BranchRecoveryPass") { TEST_CASE("Run on sliced function sub") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "UnrecoverableBranch.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "UnrecoverableBranch.ll"); auto target_function = FindFunction(mod.get(), "slice"); CHECK(target_function != nullptr); llvm::FunctionPassManager fpm; diff --git a/tests/anvill_passes/src/BrightenPointers.cpp b/tests/anvill_passes/src/BrightenPointers.cpp index 185ab0949..4378c3e65 100644 --- a/tests/anvill_passes/src/BrightenPointers.cpp +++ b/tests/anvill_passes/src/BrightenPointers.cpp @@ -8,8 +8,6 @@ #include #include -#include -#include #include #include #include @@ -94,32 +92,32 @@ TEST_SUITE("BrightenPointers") { TEST_CASE("Run the whole pass on a well-formed function") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "gep_add.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "gep_add.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("multiple_bitcast") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "multiple_bitcast.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "multiple_bitcast.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("don't crash on loops") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "loop_test.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "loop_test.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("challenge 1") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "rx_message.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "rx_message.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); @@ -130,8 +128,8 @@ TEST_SUITE("BrightenPointers") { TEST_CASE("challenge 2") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "chall2.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "chall2.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); @@ -141,121 +139,121 @@ TEST_SUITE("BrightenPointers") { } TEST_CASE("ret0") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "ret0.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "ret0.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("jmp0") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "jmp0.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "jmp0.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_array_swap") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_array_swap_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_array_swap_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_binja_var_none_type") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_binja_var_none_type_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_binja_var_none_type_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); mod->print(llvm::errs(), nullptr); CHECK(checkMod(*mod)); } TEST_CASE("test_bitops") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_bitops_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_bitops_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_binops") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_binops_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_binops_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_cast") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_cast_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_cast_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_init_list_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_init_list_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_init_list_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_inttoptr_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_inttoptr_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_inttoptr_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_nullptr_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_nullptr_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_nullptr_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_ret0_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_ret0_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_ret0_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_struct_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_struct_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_struct_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_struct_swap_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_struct_swap_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_struct_swap_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_trunc_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_trunc_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_trunc_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_zeroinit.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_zeroinit_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_zeroinit_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_zext_rt.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_zext_rt.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_zext_rt.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); } TEST_CASE("test_rx.ll") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*context, "test_rx.ll"); + llvm::LLVMContext context; + auto mod = LoadTestData(context, "test_rx.ll"); REQUIRE(mod != nullptr); CHECK(RunFunctionPass(*mod)); CHECK(checkMod(*mod)); diff --git a/tests/anvill_passes/src/InstructionFolderPass.cpp b/tests/anvill_passes/src/InstructionFolderPass.cpp index d25610c2e..0d7b6f5e7 100644 --- a/tests/anvill_passes/src/InstructionFolderPass.cpp +++ b/tests/anvill_passes/src/InstructionFolderPass.cpp @@ -7,7 +7,6 @@ */ #include - #include #include #include @@ -22,12 +21,12 @@ namespace anvill { TEST_SUITE("InstructionFolderPass") { TEST_CASE("Run the whole pass on a well-formed function") { - auto context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*context, "InstructionFolderPass.ll"); + llvm::LLVMContext context; + auto module = LoadTestData(context, "InstructionFolderPass.ll"); REQUIRE(module != nullptr); - auto arch = remill::Arch::Build(context.get(), remill::GetOSName("linux"), + auto arch = remill::Arch::Build(&context, remill::GetOSName("linux"), remill::GetArchName("amd64")); REQUIRE(arch != nullptr); diff --git a/tests/anvill_passes/src/RecoverEntityUses.cpp b/tests/anvill_passes/src/RecoverEntityUses.cpp index fccad3124..3980fc167 100644 --- a/tests/anvill_passes/src/RecoverEntityUses.cpp +++ b/tests/anvill_passes/src/RecoverEntityUses.cpp @@ -17,17 +17,16 @@ namespace anvill { TEST_SUITE("RecoverEntityUses") { TEST_CASE("Regression test for unresolved anvill_pc") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, "TestingUnresolvedEntity.ll"); + llvm::LLVMContext llvm_context; + auto module = LoadTestData(llvm_context, "TestingUnresolvedEntity.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("x86")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("x86")); REQUIRE(arch != nullptr); auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + TypeDictionary tyDict(llvm_context); NullTypeProvider ty_prov(tyDict); NullMemoryProvider mem_prov; diff --git a/tests/anvill_passes/src/RemoveStackPointerCExprs.cpp b/tests/anvill_passes/src/RemoveStackPointerCExprs.cpp index 60ed2f350..1fd958285 100644 --- a/tests/anvill_passes/src/RemoveStackPointerCExprs.cpp +++ b/tests/anvill_passes/src/RemoveStackPointerCExprs.cpp @@ -1,5 +1,5 @@ +#include #include - #include #include #include @@ -10,7 +10,7 @@ #include #include #include -#include + #include #include "Utils.h" @@ -27,8 +27,8 @@ static llvm::Function *FindFunction(llvm::Module *module, std::string name) { TEST_SUITE("RemoveStackPointerCExprs") { TEST_CASE("RegressionRecoverStack.ll") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto mod = LoadTestData(*llvm_context, "RegressionRecoverStack.ll"); + llvm::LLVMContext llvm_context; + auto mod = LoadTestData(llvm_context, "RegressionRecoverStack.ll"); auto target_function = FindFunction(mod.get(), "slice"); CHECK(target_function != nullptr); llvm::FunctionPassManager fpm; diff --git a/tests/anvill_passes/src/SinkSelectionsIntoBranchTargets.cpp b/tests/anvill_passes/src/SinkSelectionsIntoBranchTargets.cpp index 6352dd5f6..b027a8a87 100644 --- a/tests/anvill_passes/src/SinkSelectionsIntoBranchTargets.cpp +++ b/tests/anvill_passes/src/SinkSelectionsIntoBranchTargets.cpp @@ -7,32 +7,32 @@ */ #include -#include #include #include +#include #include -#include "Utils.h" + #include +#include "Utils.h" + namespace anvill { TEST_SUITE("SinkSelectionsIntoBranchTargets") { TEST_CASE("Run the whole pass on a well-formed function") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); + LoadTestData(llvm_context, "SinkSelectionsIntoBranchTargets.ll"); REQUIRE(module.get() != nullptr); - CHECK(RunFunctionPass( - module.get(), SinkSelectionsIntoBranchTargets())); - + CHECK(RunFunctionPass(module.get(), SinkSelectionsIntoBranchTargets())); } TEST_CASE("SimpleCase") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); + LoadTestData(llvm_context, "SinkSelectionsIntoBranchTargets.ll"); REQUIRE(module.get() != nullptr); @@ -44,16 +44,17 @@ TEST_SUITE("SinkSelectionsIntoBranchTargets") { auto dt_res = dt.run(*function, fam); - auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); + auto analysis = + SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); CHECK(analysis.replacement_list.size() == 2U); CHECK(analysis.disposable_instruction_list.size() == 1U); } TEST_CASE("MultipleSelects") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); + LoadTestData(llvm_context, "SinkSelectionsIntoBranchTargets.ll"); REQUIRE(module.get() != nullptr); @@ -65,16 +66,17 @@ TEST_SUITE("SinkSelectionsIntoBranchTargets") { auto dt_res = dt.run(*function, fam); - auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); + auto analysis = + SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); CHECK(analysis.replacement_list.size() == 6U); CHECK(analysis.disposable_instruction_list.size() == 3U); } TEST_CASE("MultipleSelectUsages") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "SinkSelectionsIntoBranchTargets.ll"); + LoadTestData(llvm_context, "SinkSelectionsIntoBranchTargets.ll"); REQUIRE(module.get() != nullptr); @@ -86,7 +88,8 @@ TEST_SUITE("SinkSelectionsIntoBranchTargets") { auto dt_res = dt.run(*function, fam); - auto analysis = SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); + auto analysis = + SinkSelectionsIntoBranchTargets::AnalyzeFunction(dt_res, *function); CHECK(analysis.replacement_list.size() == 6U); CHECK(analysis.disposable_instruction_list.size() == 1U); diff --git a/tests/anvill_passes/src/SplitStackFrameAtReturnAddress.cpp b/tests/anvill_passes/src/SplitStackFrameAtReturnAddress.cpp index f8779d108..56242cafd 100644 --- a/tests/anvill_passes/src/SplitStackFrameAtReturnAddress.cpp +++ b/tests/anvill_passes/src/SplitStackFrameAtReturnAddress.cpp @@ -6,8 +6,8 @@ * the LICENSE file found in the root directory of this source tree. */ -#include #include +#include #include #include #include @@ -21,15 +21,13 @@ namespace anvill { TEST_SUITE("SplitStackFrameAtReturnAddress") { TEST_CASE("Run the whole pass on a well-formed function") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "SplitStackFrameAtReturnAddress.ll"); + LoadTestData(llvm_context, "SplitStackFrameAtReturnAddress.ll"); REQUIRE(module != nullptr); StackFrameRecoveryOptions opt; - CHECK(RunFunctionPass( - module.get(), SplitStackFrameAtReturnAddress(opt))); - + CHECK(RunFunctionPass(module.get(), SplitStackFrameAtReturnAddress(opt))); } } diff --git a/tests/anvill_passes/src/TestAbstractStackBB.cpp b/tests/anvill_passes/src/TestAbstractStackBB.cpp index 3c79eb930..87245e93e 100644 --- a/tests/anvill_passes/src/TestAbstractStackBB.cpp +++ b/tests/anvill_passes/src/TestAbstractStackBB.cpp @@ -25,7 +25,7 @@ namespace anvill { /* -Register pass plan: +Register pass plan: 1. iterate through all available paramater decls declaring them in the signature. 2. Call StoreNativeValue to store the parameter representing each parameter into the physcal location in the state 3. Apply SROA to the new clone @@ -42,10 +42,10 @@ Stack pass plan: TEST_SUITE("Basic Block tests") { TEST_CASE("Convert parameters") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, "MainBasicBlocks.ll"); + llvm::LLVMContext llvm_context; + auto module = LoadTestData(llvm_context, "MainBasicBlocks.ll"); auto bb_func = module->getFunction("basic_block_func4199701"); bb_func->dump(); } } -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/tests/anvill_passes/src/TransformRemillJump.cpp b/tests/anvill_passes/src/TransformRemillJump.cpp index c721a18de..361bfab02 100644 --- a/tests/anvill_passes/src/TransformRemillJump.cpp +++ b/tests/anvill_passes/src/TransformRemillJump.cpp @@ -25,16 +25,15 @@ namespace anvill { TEST_SUITE("TransformRemillJump_Test0") { TEST_CASE("Run the pass on function having _remill_jump as tail call") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, "TransformRemillJumpData0.ll"); + llvm::LLVMContext llvm_context; + auto module = LoadTestData(llvm_context, "TransformRemillJumpData0.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("amd64")); REQUIRE(arch != nullptr); auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + TypeDictionary tyDict(llvm_context); NullTypeProvider ty_prov(tyDict); NullMemoryProvider mem_prov; @@ -63,15 +62,14 @@ TEST_SUITE("TransformRemillJump_Test0") { TEST_SUITE("TransformRemillJump_Test1") { TEST_CASE("Run the pass on function having _remill_jump as tail call") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, "TransformRemillJumpData1.ll"); + llvm::LLVMContext llvm_context; + auto module = LoadTestData(llvm_context, "TransformRemillJumpData1.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("amd64")); REQUIRE(arch != nullptr); auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + TypeDictionary tyDict(llvm_context); NullTypeProvider ty_prov(tyDict); NullMemoryProvider mem_prov; @@ -99,17 +97,16 @@ TEST_SUITE("TransformRemillJump_Test1") { TEST_SUITE("TransformRemillJump_ARM32_0") { TEST_CASE("Run the pass on function having _remill_jump as tail call") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "TransformRemillJumpDataARM32_0.ll"); + LoadTestData(llvm_context, "TransformRemillJumpDataARM32_0.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("aarch32")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("aarch32")); REQUIRE(arch != nullptr); auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + TypeDictionary tyDict(llvm_context); NullTypeProvider ty_prov(tyDict); NullMemoryProvider mem_prov; @@ -137,17 +134,16 @@ TEST_SUITE("TransformRemillJump_ARM32_0") { TEST_SUITE("TransformRemillJump_ARM32_1") { TEST_CASE("Run the pass on function having _remill_jump as tail call") { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); + llvm::LLVMContext llvm_context; auto module = - LoadTestData(*llvm_context, "TransformRemillJumpDataARM32_1.ll"); + LoadTestData(llvm_context, "TransformRemillJumpDataARM32_1.ll"); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("aarch32")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("aarch32")); REQUIRE(arch != nullptr); auto ctrl_flow_provider = anvill::NullControlFlowProvider(); - TypeDictionary tyDict(*llvm_context); + TypeDictionary tyDict(llvm_context); NullTypeProvider ty_prov(tyDict); NullMemoryProvider mem_prov; diff --git a/tests/anvill_passes/src/Utils.cpp b/tests/anvill_passes/src/Utils.cpp index 9caeba656..51f41c9bd 100644 --- a/tests/anvill_passes/src/Utils.cpp +++ b/tests/anvill_passes/src/Utils.cpp @@ -80,12 +80,4 @@ const PlatformList &GetSupportedPlatforms(void) { return kSupportedPlatforms; } -std::unique_ptr CreateContextWithOpaquePointers(void) { - auto context = std::make_unique(); -#if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) - context->enableOpaquePointers(); -#endif - return context; -} - } // namespace anvill diff --git a/tests/anvill_passes/src/Utils.h b/tests/anvill_passes/src/Utils.h index 029664c33..a10921720 100644 --- a/tests/anvill_passes/src/Utils.h +++ b/tests/anvill_passes/src/Utils.h @@ -57,6 +57,4 @@ struct Platform final { using PlatformList = std::vector; const PlatformList &GetSupportedPlatforms(void); -std::unique_ptr CreateContextWithOpaquePointers(void); - } // namespace anvill diff --git a/tests/anvill_passes/src/XorConversionPass.cpp b/tests/anvill_passes/src/XorConversionPass.cpp index cfe9baf84..f533ff3f4 100644 --- a/tests/anvill_passes/src/XorConversionPass.cpp +++ b/tests/anvill_passes/src/XorConversionPass.cpp @@ -42,12 +42,11 @@ static std::tuple runXorRemovalPassCountXors(const std::string &module_name, const std::string &function_name) { - auto llvm_context = anvill::CreateContextWithOpaquePointers(); - auto module = LoadTestData(*llvm_context, module_name); + llvm::LLVMContext llvm_context; + auto module = LoadTestData(llvm_context, module_name); - auto arch = - remill::Arch::Build(llvm_context.get(), remill::GetOSName("linux"), - remill::GetArchName("amd64")); + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("amd64")); REQUIRE(arch != nullptr); diff --git a/tests/tools/src/TypeSpecification.cpp b/tests/tools/src/TypeSpecification.cpp index 1b9f71aa3..e2b2e2d86 100644 --- a/tests/tools/src/TypeSpecification.cpp +++ b/tests/tools/src/TypeSpecification.cpp @@ -7,7 +7,6 @@ */ #include - #include #include #include @@ -40,9 +39,6 @@ TEST_SUITE("TypeSpecifier") { for (const auto &test_entry : kTestEntryList) { llvm::LLVMContext llvm_context; -#if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) - llvm_context.enableOpaquePointers(); -#endif llvm::DataLayout dl("e-m:e-i64:64-f80:128-n8:16:32:64-S128"); anvill::TypeDictionary type_dict(llvm_context); @@ -75,9 +71,6 @@ TEST_SUITE("TypeSpecifier") { }; llvm::LLVMContext llvm_context; -#if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) - llvm_context.enableOpaquePointers(); -#endif llvm::Module module("TypeSpecifierTests", llvm_context); const auto &data_layout = module.getDataLayout(); @@ -228,9 +221,6 @@ TEST_SUITE("TypeSpecifier") { }; llvm::LLVMContext llvm_context; -#if LLVM_VERSION_NUMBER < LLVM_VERSION(15, 0) - llvm_context.enableOpaquePointers(); -#endif llvm::Module module("TypeSpecifierTests", llvm_context); const auto &data_layout = module.getDataLayout(); From 2b466fde39096e6609726a7cff89e7d11a46f7cf Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 16 Feb 2023 09:33:30 -0500 Subject: [PATCH 108/163] dont require lifter options to provide pointer from args (#350) --- include/anvill/Utils.h | 8 ++++---- lib/Lifters/BasicBlockLifter.cpp | 2 +- lib/Passes/ReplaceStackReferences.cpp | 6 ++---- lib/Utils.cpp | 8 +++----- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 3c48f8e26..01524adaf 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -134,9 +134,9 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, std::optional GetBasicBlockAddr(llvm::Function *func); -llvm::Argument *ProvidePointerFromFunctionArgs(llvm::Function *func, - size_t index, - const anvill::LifterOptions &, - const BasicBlockContext &); +llvm::Argument * +ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, + const anvill::BasicBlockContext &context); + llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e8fc1fb2d..e781aeef1 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -692,7 +692,7 @@ llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::IRBuilder<> &ir, llvm::Value * BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index) const { - return anvill::ProvidePointerFromFunctionArgs(func, index, this->options, + return anvill::ProvidePointerFromFunctionArgs(func, index, *this->block_context); } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index bcad821b6..ffffd7341 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -290,7 +290,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( if (referenced_variable.has_value()) { auto g = anvill::ProvidePointerFromFunctionArgs( - &F, referenced_variable->decl.index, this->lifter.Options(), cont); + &F, referenced_variable->decl.index, cont); auto ptr = GetPtrToOffsetInto(ent_insert, this->lifter.DataLayout(), referenced_variable->decl.decl.type, g, referenced_variable->offset); @@ -366,9 +366,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( anvill::GetBasicBlockStackPtr(&F)->addAttr(noalias); for (auto lives : cont.LiveParamsAtEntryAndExit()) { - ProvidePointerFromFunctionArgs(&F, lives.index, this->lifter.Options(), - cont) - ->addAttr(noalias); + ProvidePointerFromFunctionArgs(&F, lives.index, cont)->addAttr(noalias); } } diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 9c036e4c3..b7dda781b 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -837,13 +838,10 @@ llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { llvm::Argument * ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, - const anvill::LifterOptions &options, const anvill::BasicBlockContext &context) { - CHECK(options.arch->LiftedFunctionType()->getNumParams() + 1 + - context.LiveParamsAtEntryAndExit().size() == + CHECK(remill::kNumBlockArgs + 1 + context.LiveParamsAtEntryAndExit().size() == func->arg_size()); - return func->getArg(index + - options.arch->LiftedFunctionType()->getNumParams() + 1); + return func->getArg(index + remill::kNumBlockArgs + 1); } From 4a95869bb9c04e019775b658f537865a47cc23c0 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Thu, 16 Feb 2023 11:27:43 -0500 Subject: [PATCH 109/163] Add global register support (#351) * implement consuming global register value information from spec * add flag to enable llvm debug output * fix global lifting * clang format * remove extra ->dump * remove unused typedef * use emplace + move instead of insert --------- Co-authored-by: 2over12 --- bin/Decompile/Main.cpp | 6 +++++- include/anvill/Declarations.h | 16 ++++++++++++++- lib/Declarations.cpp | 7 +++++++ lib/Lifters/BasicBlockLifter.cpp | 13 +++++++++++- lib/Lifters/Options.cpp | 1 - lib/Protobuf.cpp | 34 ++++++++++++++++++++++---------- 6 files changed, 63 insertions(+), 14 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 2b10ca92d..2988ad0c8 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -44,7 +44,7 @@ DEFINE_bool(add_breakpoints, false, DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); - +DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); DEFINE_string( default_callable_spec, "", @@ -241,6 +241,10 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } + if (FLAGS_llvm_debug) { + llvm::DebugFlag = true; + } + if (!FLAGS_disable_opt) { anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); } diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 53b7bc67c..5287cddeb 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -117,11 +117,16 @@ struct OffsetDomain { ValueDecl target_value; std::int64_t stack_offset; }; + +struct ConstantDomain { + ValueDecl target_value; + std::uint64_t value; +}; + struct SpecStackOffsets { std::vector affine_equalities; }; - // A declaration for a callable entity. struct CallableDecl { private: @@ -216,6 +221,8 @@ class BasicBlockContext { virtual const SpecStackOffsets &GetStackOffsets() const = 0; + virtual const std::vector &GetConstants() const = 0; + virtual size_t GetStackSize() const = 0; virtual size_t GetMaxStackSize() const = 0; @@ -295,16 +302,20 @@ class SpecBlockContext : public BasicBlockContext { private: const FunctionDecl &decl; SpecStackOffsets offsets; + std::vector constants; std::vector live_params_at_entry; std::vector live_params_at_exit; public: SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets, + std::vector constants, std::vector live_params_at_entry, std::vector live_params_at_exit); virtual const SpecStackOffsets &GetStackOffsets() const override; + virtual const std::vector &GetConstants() const override; + virtual const std::vector &ReturnValue() const override; virtual uint64_t GetParentFunctionAddress() const override; @@ -363,6 +374,9 @@ struct FunctionDecl : public CallableDecl { std::unordered_map> live_regs_at_exit; + std::unordered_map> + constant_values; + std::uint64_t stack_depth; std::uint64_t maximum_depth; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 515706505..66572df9e 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -240,10 +240,12 @@ size_t SpecBlockContext::GetMaxStackSize() const { SpecBlockContext::SpecBlockContext( const FunctionDecl &decl, SpecStackOffsets offsets, + std::vector constants, std::vector live_params_at_entry, std::vector live_params_at_exit) : decl(decl), offsets(std::move(offsets)), + constants(std::move(constants)), live_params_at_entry(std::move(live_params_at_entry)), live_params_at_exit(std::move(live_params_at_exit)) {} @@ -263,6 +265,10 @@ const SpecStackOffsets &SpecBlockContext::GetStackOffsets() const { return this->offsets; } +const std::vector &SpecBlockContext::GetConstants() const { + return this->constants; +} + // Interpret `target` as being the function to call, and call it from within // a basic block in a lifted bitcode function. Returns the new value of the // memory pointer. @@ -469,6 +475,7 @@ size_t FunctionDecl::GetPointerDisplacement() const { SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { return SpecBlockContext( *this, GetWithDef(addr, this->stack_offsets, SpecStackOffsets()), + GetWithDef(addr, this->constant_values, std::vector()), GetWithDef(addr, this->live_regs_at_entry, std::vector()), GetWithDef(addr, this->live_regs_at_exit, std::vector())); } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e781aeef1..855320ab4 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -481,12 +481,23 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { return this->ProvidePointerFromFunctionArgs(func, index); }; - LOG(INFO) << "Live values at entry to function " << this->block_context->LiveBBParamsAtEntry().size(); this->UnpackLiveValues(ir, ptr_provider, this->state_ptr, this->block_context->LiveBBParamsAtEntry()); + for (auto ®_const : this->block_context->GetConstants()) { + auto new_value = this->options.program_counter_init_procedure( + ir, reg_const.target_value.reg->type, reg_const.value); + DLOG(INFO) << "Dumping " << reg_const.target_value.reg->name << " " + << std::hex << reg_const.value; + auto nmem = StoreNativeValue(new_value, reg_const.target_value, + type_provider.Dictionary(), intrinsics, ir, + this->state_ptr, + remill::LoadMemoryPointer(ir, intrinsics)); + ir.CreateStore(nmem, remill::LoadMemoryPointerRef(ir.GetInsertBlock())); + } + auto pc_arg = remill::NthArgument(func, remill::kPCArgNum); auto mem_arg = remill::NthArgument(func, remill::kMemoryPointerArgNum); diff --git a/lib/Lifters/Options.cpp b/lib/Lifters/Options.cpp index d50fd3b79..2d0bc2aab 100644 --- a/lib/Lifters/Options.cpp +++ b/lib/Lifters/Options.cpp @@ -38,7 +38,6 @@ const ::anvill::TypeDictionary &LifterOptions::TypeDictionary(void) const { return type_provider.Dictionary(); } - llvm::Value *LifterOptions::SymbolicStackPointerInitWithOffset( llvm::IRBuilderBase &ir, const remill::Register *sp_reg, uint64_t func_address, std::int64_t offset) { diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 5449744ff..abc4a7ee8 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -595,11 +595,10 @@ void ProtobufTranslator::ParseCFGIntoFunction( for (auto &[blk_addr, ctx] : obj.block_context()) { - std::vector affine_equalities; + std::vector stack_offsets; + std::vector constant_values; auto blk = decl.cfg[blk_addr]; for (auto &symval : ctx.symvals()) { - OffsetDomain reg_off; - if (!symval.has_target_value()) { LOG(FATAL) << "All equalities must have a target"; } @@ -624,17 +623,32 @@ void ProtobufTranslator::ParseCFGIntoFunction( LOG(FATAL) << "Mapping should have current value"; } - LOG_IF(FATAL, !symval.curr_val().has_stack_disp()) - << "Only stack displacements supported for affine relations"; + if (symval.curr_val().has_stack_disp()) { + OffsetDomain reg_off; + + reg_off.stack_offset = symval.curr_val().stack_disp(); + reg_off.target_value = target_vdecl.TakeValue(); + + stack_offsets.push_back(reg_off); + } else if (symval.curr_val().has_constant()) { + ConstantDomain const_val; - reg_off.stack_offset = symval.curr_val().stack_disp(); - reg_off.target_value = target_vdecl.TakeValue(); + const_val.target_value = target_vdecl.TakeValue(); + const_val.value = symval.curr_val().constant(); - affine_equalities.push_back(reg_off); + DLOG(INFO) << "Adding global register override for " + << const_val.target_value.reg->name << " " << std::hex + << const_val.value; + constant_values.push_back(const_val); + } else { + LOG(FATAL) << symval.curr_val().GetTypeName() + << " is unimplemented for affine relations"; + } } - SpecStackOffsets off = {affine_equalities}; - decl.stack_offsets.insert({blk_addr, off}); + SpecStackOffsets off = {stack_offsets}; + decl.stack_offsets.emplace(blk_addr, std::move(off)); + decl.constant_values.emplace(blk_addr, std::move(constant_values)); this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_addr, ctx.live_at_entries()); From 7d84a1c0278d5b8cfe5032e75d4c3f3204dd3909 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 21 Feb 2023 13:23:03 -0500 Subject: [PATCH 110/163] Respect taint pc flag when lifting constant symvals (#355) * update format * adapt types --- data_specifications/specification.proto | 7 ++++++- include/anvill/Declarations.h | 1 + lib/Lifters/BasicBlockLifter.cpp | 21 +++++++++++++++++---- lib/Protobuf.cpp | 8 +++++--- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 99e729d33..e4de9828b 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -153,11 +153,16 @@ message Callable { +message Constant { + uint64 value = 1; + bool is_tainted_by_pc = 2; +} + message ValueDomain { oneof inner { HighSymbol symb = 1; int64 stack_disp = 2; - int64 constant = 3; + Constant constant = 3; } } diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 5287cddeb..0c5a84a5b 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -121,6 +121,7 @@ struct OffsetDomain { struct ConstantDomain { ValueDecl target_value; std::uint64_t value; + bool should_taint_by_pc; }; struct SpecStackOffsets { diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 855320ab4..a9f0a0467 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -487,10 +487,23 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->block_context->LiveBBParamsAtEntry()); for (auto ®_const : this->block_context->GetConstants()) { - auto new_value = this->options.program_counter_init_procedure( - ir, reg_const.target_value.reg->type, reg_const.value); - DLOG(INFO) << "Dumping " << reg_const.target_value.reg->name << " " - << std::hex << reg_const.value; + llvm::Value *new_value = nullptr; + llvm::Type *target_type = reg_const.target_value.type; + if (reg_const.should_taint_by_pc) { + new_value = this->options.program_counter_init_procedure( + ir, this->address_type, reg_const.value); + + if (this->address_type != target_type) { + new_value = AdaptToType(ir, new_value, target_type); + } + } else { + new_value = llvm::ConstantInt::get(target_type, reg_const.value, false); + } + + + DLOG_IF(INFO, reg_const.target_value.reg) + << "Dumping " << reg_const.target_value.reg->name << " " << std::hex + << reg_const.value; auto nmem = StoreNativeValue(new_value, reg_const.target_value, type_provider.Dictionary(), intrinsics, ir, this->state_ptr, diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index abc4a7ee8..01822fd94 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -634,11 +634,13 @@ void ProtobufTranslator::ParseCFGIntoFunction( ConstantDomain const_val; const_val.target_value = target_vdecl.TakeValue(); - const_val.value = symval.curr_val().constant(); + const_val.value = symval.curr_val().constant().value(); + const_val.should_taint_by_pc = + symval.curr_val().constant().is_tainted_by_pc(); DLOG(INFO) << "Adding global register override for " - << const_val.target_value.reg->name << " " << std::hex - << const_val.value; + << const_val.target_value.reg->name << " " << std::hex + << const_val.value; constant_values.push_back(const_val); } else { LOG(FATAL) << symval.curr_val().GetTypeName() From 6493170cc9038e027fbf1a98c9dc783a2de78775 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 22 Feb 2023 09:13:08 -0500 Subject: [PATCH 111/163] Adds a control flow simplification pass to support idiomatic block gotos (#354) * implement straightforward case * fix trivial case * add comment * add conditional branch * add optional remove next pc * fix header * add decompile flag for remove next pc --- bin/Decompile/Main.cpp | 5 + include/anvill/ABI.h | 4 + include/anvill/Lifters.h | 5 +- .../anvill/Passes/RemoveAssignmentsToNextPC.h | 33 +++++ lib/ABI.cpp | 2 + lib/CMakeLists.txt | 1 + lib/Optimize.cpp | 4 + lib/Passes/RemoveAssignmentsToNextPC.cpp | 134 ++++++++++++++++++ libraries/lifting-tools-ci | 2 +- 9 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 include/anvill/Passes/RemoveAssignmentsToNextPC.h create mode 100644 lib/Passes/RemoveAssignmentsToNextPC.cpp diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 2988ad0c8..638124175 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -45,6 +45,8 @@ DEFINE_bool(add_breakpoints, false, DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); +DEFINE_bool(remove_next_pc_assignments, false, + "Enables remove next pc assignment pass"); DEFINE_string( default_callable_spec, "", @@ -172,6 +174,9 @@ int main(int argc, char *argv[]) { options.stack_frame_recovery_options.stack_offset_metadata_name = "stack_offset"; + options.should_remove_assignments_to_next_pc = + FLAGS_remove_next_pc_assignments; + anvill::EntityLifter lifter(options); std::unordered_map names; diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index fa8d60e0e..2636ca913 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -88,4 +88,8 @@ extern const std::string kBasicBlockMetadata; /// Intrinsic that acts like a return instruction but leaves both the basic block and the parent function. extern const std::string kAnvillBasicBlockReturn; + +// Instrinsic that acts as a goto to an address +extern const std::string kAnvillGoto; + } // namespace anvill diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index caa839efa..6c71a1dde 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -218,7 +218,8 @@ class LifterOptions { track_provenance(false), //TODO(ian): This should be initialized by an OS + arch pair stack_pointer_is_signed(false), - should_remove_anvill_pc(true) { + should_remove_anvill_pc(true), + should_remove_assignments_to_next_pc(false) { CheckModuleContextMatchesArch(); } @@ -290,6 +291,8 @@ class LifterOptions { bool should_remove_anvill_pc : 1; + bool should_remove_assignments_to_next_pc : 1; + private: LifterOptions(void) = delete; diff --git a/include/anvill/Passes/RemoveAssignmentsToNextPC.h b/include/anvill/Passes/RemoveAssignmentsToNextPC.h new file mode 100644 index 000000000..45ffd6847 --- /dev/null +++ b/include/anvill/Passes/RemoveAssignmentsToNextPC.h @@ -0,0 +1,33 @@ + +#pragma once + +#include +#include +#include + +#include "anvill/Lifters.h" + + +namespace anvill { + +// attempts to replace assignments to next pc with idiomatic control flow that terminates the block +// with the goto intrinsic +class RemoveAssignmentsToNextPC final + : public BasicBlockPass { + private: + const EntityLifter &lifter; + + public: + RemoveAssignmentsToNextPC(const BasicBlockContexts &contexts, + const EntityLifter &lifter) + : BasicBlockPass(contexts), + lifter(lifter) {} + + static llvm::StringRef name(void); + + + llvm::PreservedAnalyses + runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &); +}; +} // namespace anvill \ No newline at end of file diff --git a/lib/ABI.cpp b/lib/ABI.cpp index c50cfedc9..59220129b 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -87,4 +87,6 @@ const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); const std::string kAnvillBasicBlockReturn(kAnvillNamePrefix + "basic_block_function_return"); +const std::string kAnvillGoto(kAnvillNamePrefix + "goto"); + } // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 0c5e6049d..8a45b89c1 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -61,6 +61,7 @@ set(anvill_passes ReplaceStackReferences RemoveCallIntrinsics ReplaceRemillFunctionReturnsWithAnvillFunctionReturns + RemoveAssignmentsToNextPC ) set(anvill_arch_HEADERS diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 43c4cbdf1..62fdf003d 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -73,6 +73,7 @@ #include #include +#include "anvill/Passes/RemoveAssignmentsToNextPC.h" #include "anvill/Specification.h" namespace anvill { @@ -292,6 +293,9 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, AddTransformRemillJumpIntrinsics(second_fpm, xr); second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); + if (options.should_remove_assignments_to_next_pc) { + second_fpm.addPass(anvill::RemoveAssignmentsToNextPC(contexts, lifter)); + } //AddRemoveRemillFunctionReturns(second_fpm, xr); //AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); AddLowerRemillUndefinedIntrinsics(second_fpm); diff --git a/lib/Passes/RemoveAssignmentsToNextPC.cpp b/lib/Passes/RemoveAssignmentsToNextPC.cpp new file mode 100644 index 000000000..08db0151c --- /dev/null +++ b/lib/Passes/RemoveAssignmentsToNextPC.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace anvill { + +llvm::StringRef RemoveAssignmentsToNextPC::name(void) { + return "Replace stack references"; +} + + +namespace { +std::optional +UniqueAssignmentToNextPc(llvm::Function *func) { + auto target_arg = remill::NthArgument(func, remill::kNumBlockArgs); + + if (target_arg->getNumUses() == 1) { + if (auto *user = + llvm::dyn_cast(*target_arg->user_begin())) { + return user; + } + } + + return std::nullopt; +} + +std::optional UniqueReturn(llvm::Function *func) { + std::optional r = std::nullopt; + for (auto &insn : llvm::instructions(func)) { + if (auto nret = llvm::dyn_cast(&insn)) { + if (r) { + return std::nullopt; + } else { + r = nret; + } + } + } + + return r; +} + +llvm::Function *GetOrCreateGotoInstrinsic(llvm::Module *mod, + llvm::IntegerType *addr_ty) { + auto fun = mod->getFunction(anvill::kAnvillGoto); + if (fun) { + return fun; + } + auto tgt_type = llvm::FunctionType::get( + llvm::Type::getVoidTy(mod->getContext()), {addr_ty}, false); + return llvm::Function::Create(tgt_type, llvm::GlobalValue::ExternalLinkage, + anvill::kAnvillGoto, mod); +} + + +llvm::BasicBlock *CreateTargetBlock(llvm::Value *mem_val, llvm::Constant *c, + llvm::Function *func, + llvm::Function *intrinsic) { + auto bb = llvm::BasicBlock::Create(func->getContext(), "", func); + + llvm::IRBuilder<> ir(bb); + ir.CreateCall(intrinsic, {c}); + ir.CreateRet(mem_val); + + return bb; +} + +} // namespace + + +namespace pats = llvm::PatternMatch; +llvm::PreservedAnalyses RemoveAssignmentsToNextPC::runOnBasicBlockFunction( + llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &cont) { + + auto next_pc_assign = UniqueAssignmentToNextPc(&F); + auto maybe_unique_ret = UniqueReturn(&F); + if (!next_pc_assign || !maybe_unique_ret) { + return llvm::PreservedAnalyses::all(); + } + + auto unique_ret = *maybe_unique_ret; + + + auto stored = (*next_pc_assign)->getValueOperand(); + // now we have threes cases we can handle: constant in which case terminate with a goto, select on constant, create a terminating if goto, + // non constant (now we could try to recover a jump table here, but instead just switch on the stored pc value) + // TODO(Ian): we may be able to use the jump table analysis here to recover more idiomatic switching.. we are essentially re-doing anvill complete switch here + llvm::Constant *first{nullptr}; + llvm::Constant *second{nullptr}; + llvm::Value *condition{nullptr}; + + auto goto_instrinsic = GetOrCreateGotoInstrinsic( + F.getParent(), this->lifter.Options().arch->AddressType()); + if (pats::match(stored, pats::m_Constant(first))) { + // TODO(Ian): should probably check pc taint + llvm::IRBuilder<> ir(unique_ret); + ir.CreateCall(goto_instrinsic, {first}); + (*next_pc_assign)->eraseFromParent(); + } else if (pats::match(stored, pats::m_Select(pats::m_Value(condition), + pats::m_Constant(first), + pats::m_Constant(second)))) { + auto mem = unique_ret->getReturnValue(); + llvm::IRBuilder<> ir(unique_ret->getParent()); + unique_ret->eraseFromParent(); + auto f = CreateTargetBlock(mem, first, &F, goto_instrinsic); + auto s = CreateTargetBlock(mem, second, &F, goto_instrinsic); + ir.CreateCondBr(condition, f, s); + (*next_pc_assign)->eraseFromParent(); + } else { + // not supported yet + return llvm::PreservedAnalyses::all(); + } + + CHECK(!llvm::verifyFunction(F, &llvm::errs())); + + return llvm::PreservedAnalyses::none(); +} + +} // namespace anvill \ No newline at end of file diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 2f5ae380e..3bf1ea36c 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 2f5ae380e78f2288ead5f4c1c6aef30c68c9d721 +Subproject commit 3bf1ea36c73ad35cbf8faa44f8b9bd8c8f911f97 From 71976158e5e318546f5ac44e13f0383660de1a51 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 24 Feb 2023 06:50:03 -0500 Subject: [PATCH 112/163] Rework load lifted value and store native to operate over composite low level storage (#356) * hacky compiles * links * casts * first kinda working version * keep around entities * dont try to lift null return * add check * remove spurious check * bump remill * dont let passbuilder use default lib info --- data_specifications/specification.proto | 2 + include/anvill/Declarations.h | 30 +- include/anvill/Utils.h | 10 +- lib/Arch/Arch.cpp | 19 +- lib/Arch/Arch.h | 6 +- lib/Arch/StubABI.cpp | 28 ++ lib/CMakeLists.txt | 28 +- lib/Declarations.cpp | 165 +++++----- lib/Lifters/BasicBlockLifter.cpp | 36 ++- lib/Lifters/FunctionLifter.cpp | 24 +- lib/Optimize.cpp | 8 +- lib/Passes/RemoveCallIntrinsics.cpp | 3 +- ...nctionReturnsWithAnvillFunctionReturns.cpp | 10 +- lib/Passes/ReplaceStackReferences.cpp | 38 ++- lib/Protobuf.cpp | 144 +++++---- lib/Protobuf.h | 16 +- lib/Utils.cpp | 295 +++++++++++++----- libraries/lifting-tools-ci | 2 +- remill | 2 +- 19 files changed, 545 insertions(+), 321 deletions(-) create mode 100644 lib/Arch/StubABI.cpp diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index e4de9828b..2f6c44729 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -94,11 +94,13 @@ message TypeSpec { message Register { string register_name = 1; + optional uint64 subreg_sz = 2; } message Memory { optional string base_reg = 1; int64 offset = 2; + uint64 size = 3; } message Variable { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 0c5a84a5b..815472424 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -59,6 +59,18 @@ struct CodeBlock { class TypeDictionary; + +struct LowLoc { + const remill::Register *reg{nullptr}; + const remill::Register *mem_reg{nullptr}; + std::int64_t mem_offset{0}; + std::optional size{std::nullopt}; + + std::uint64_t Size() const; + + bool operator==(const LowLoc &loc) const; +}; + // A value, such as a parameter or a return value. Values are resident // in one of two locations: either in a register, represented by a non- // nullptr `reg` value, or in memory, at `[mem_reg + mem_offset]`. @@ -75,9 +87,7 @@ class TypeDictionary; // the caller allocate the space, and pass a pointer to that space into // the callee, and so that should be represented using a parameter. struct ValueDecl { - const remill::Register *reg{nullptr}; - const remill::Register *mem_reg{nullptr}; - std::int64_t mem_offset{0}; + std::vector oredered_locs; TypeSpec spec_type; @@ -85,6 +95,7 @@ struct ValueDecl { llvm::Type *type{nullptr}; }; + // A value declaration corresponding with a named parameter. struct ParameterDecl : public ValueDecl { @@ -169,7 +180,7 @@ struct CallableDecl { // NOTE(pag): In the case of the AMD64 Itanium ABI, we expect the // specification to include `RDX` as an explicit return // value when the function might throw an exception. - std::vector returns; + ValueDecl returns; // Is this a noreturn function, e.g. like `abort`? bool is_noreturn{false}; @@ -201,11 +212,6 @@ struct CallableDecl { DecodeFromPB(const remill::Arch *arch, const std::string &pb); }; -struct LocalVariableDecl { - std::string name; - std::vector values; -}; - // Basic block contexts impose an ordering on live values s.t. shared Parameters between // live exits and entries @@ -232,7 +238,7 @@ class BasicBlockContext { virtual uint64_t GetParentFunctionAddress() const = 0; - virtual const std::vector &ReturnValue() const = 0; + virtual ValueDecl ReturnValue() const = 0; // Deduplicates locations and ensures there are no overlapping decls // A valid parameter list is a set of non overlapping a-locs with distinct names. @@ -317,7 +323,7 @@ class SpecBlockContext : public BasicBlockContext { virtual const std::vector &GetConstants() const override; - virtual const std::vector &ReturnValue() const override; + virtual ValueDecl ReturnValue() const override; virtual uint64_t GetParentFunctionAddress() const override; @@ -365,7 +371,7 @@ struct FunctionDecl : public CallableDecl { // These are the blocks contained within the function representing the CFG. std::unordered_map cfg; - std::unordered_map locals; + std::unordered_map locals; std::unordered_map stack_offsets; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 01524adaf..467a51a7d 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -91,14 +91,14 @@ bool CanBeAliased(llvm::Value *val); // the lifted value associated with `decl`. llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, + const remill::Arch *arch, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, - llvm::IRBuilder<> &ir, llvm::Value *state_ptr, - llvm::Value *mem_ptr); - + const remill::Arch *arch, llvm::IRBuilder<> &ir, + llvm::Value *state_ptr, llvm::Value *mem_ptr); void CloneIntrinsicsFromModule(llvm::Module &from, llvm::Module &into); @@ -139,4 +139,8 @@ ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, const anvill::BasicBlockContext &context); llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); + +bool HasMemLoc(const ValueDecl &v); + +bool HasRegLoc(const ValueDecl &v); } // namespace anvill diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index 94643f819..84f2b24f5 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -44,7 +44,7 @@ bool RegisterConstraint::ContainsVariant(const std::string &name) const { Result CallingConvention::CreateCCFromArch(const remill::Arch *arch) { - switch (arch->arch_name) { + /*switch (arch->arch_name) { case remill::kArchInvalid: { return kInvalidArch; } @@ -105,14 +105,15 @@ CallingConvention::CreateCCFromArch(const remill::Arch *arch) { std::stringstream ss; ss << "Unsupported architecture/OS pair: " << arch_name << " and " << os_name; - return ss.str(); + return ss.str();*/ + return CreateStubABI(); } // Still need the arch to be passed in so we can create the calling convention Result -CallingConvention::CreateCCFromArchAndID( - const remill::Arch *arch, llvm::CallingConv::ID cc_id) { - switch (cc_id) { +CallingConvention::CreateCCFromArchAndID(const remill::Arch *arch, + llvm::CallingConv::ID cc_id) { + /* switch (cc_id) { case llvm::CallingConv::C: if (arch->IsX86()) { return CreateX86_C(arch); @@ -166,7 +167,8 @@ CallingConvention::CreateCCFromArchAndID( std::stringstream ss; ss << "Unsupported calling convention ID: " << static_cast(cc_id); - return ss.str(); + return ss.str();*/ + return CreateStubABI(); } Result @@ -182,7 +184,7 @@ CallingConvention::AllocateSignature(llvm::Function &func) { if (remill::IsError(maybe_decl)) { return remill::GetErrorString(maybe_decl); } else { - // Here we override the return type of the extern declaration to match how it was allocated + // Here we override the return type of the extern declaration to match how it was allocated // In the future instead of doing this we should store information about how to extract return values at the llvm // level into the abi returns. // TODO(ian): Dont dont do this. @@ -197,8 +199,7 @@ CallingConvention::AllocateSignature(llvm::Function &func) { // positional starting at 1. std::vector TryRecoverParamNames(const llvm::Function &function) { std::vector param_names; - param_names.reserve( - function.getFunctionType()->getNumParams()); + param_names.reserve(function.getFunctionType()->getNumParams()); auto i = 0u; for (auto ¶m : function.args()) { diff --git a/lib/Arch/Arch.h b/lib/Arch/Arch.h index 3a01b497e..d07996c77 100644 --- a/lib/Arch/Arch.h +++ b/lib/Arch/Arch.h @@ -145,7 +145,7 @@ class CallingConvention { protected: const remill::Arch *const arch; - + /* static std::unique_ptr CreateX86_C(const remill::Arch *arch); @@ -174,7 +174,9 @@ class CallingConvention { CreateSPARC64_C(const remill::Arch *arch); static std::unique_ptr - CreatePPC_SysV(const remill::Arch *arch); + CreatePPC_SysV(const remill::Arch *arch);*/ + + static std::unique_ptr CreateStubABI(); private: const llvm::CallingConv::ID identity; diff --git a/lib/Arch/StubABI.cpp b/lib/Arch/StubABI.cpp new file mode 100644 index 000000000..92b0652c0 --- /dev/null +++ b/lib/Arch/StubABI.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#include + +#include "Arch.h" + +namespace anvill { + +class Stub : public CallingConvention { + public: + Stub() : CallingConvention(0, nullptr) {} + + llvm::Error AllocateSignature(FunctionDecl &fdecl, + llvm::Function &func) override { + return llvm::createStringError( + std::errc::invalid_argument, + "No longer supporting allocating signatures"); + } +}; + +std::unique_ptr CallingConvention::CreateStubABI() { + + return std::make_unique(); +} +} // namespace anvill \ No newline at end of file diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8a45b89c1..d1a037200 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -65,23 +65,27 @@ set(anvill_passes ) set(anvill_arch_HEADERS - "Arch/AllocationState.h" + + # "Arch/AllocationState.h" "Arch/Arch.h" ) set(anvill_arch_SOURCES - "Arch/AArch32_C.cpp" - "Arch/AArch64_C.cpp" - "Arch/AllocationState.cpp" + + # "Arch/AArch32_C.cpp" + # "Arch/AArch64_C.cpp" + # "Arch/AllocationState.cpp" "Arch/Arch.cpp" - "Arch/PPC_SysV.cpp" - "Arch/SPARC32_C.cpp" - "Arch/SPARC64_C.cpp" - "Arch/X86_64_SysV.cpp" - "Arch/X86_C.cpp" - "Arch/X86_FastCall.cpp" - "Arch/X86_StdCall.cpp" - "Arch/X86_ThisCall.cpp" + "Arch/StubABI.cpp" + + # "Arch/PPC_SysV.cpp" + # "Arch/SPARC32_C.cpp" + # "Arch/SPARC64_C.cpp" + # "Arch/X86_64_SysV.cpp" + # "Arch/X86_C.cpp" + # "Arch/X86_FastCall.cpp" + # "Arch/X86_StdCall.cpp" + # "Arch/X86_ThisCall.cpp" ) set(anvill_lifters_HEADERS diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 66572df9e..75b1152a2 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -43,29 +44,26 @@ namespace { -// A value decl without a type, we assume a parameter occupying a location occupies the entire location -// or that offsets are disjoin, this isnt completely correct if we start doing better stack liveness TODO(Ian) -// We would need to check offset+size overlaps -struct LocBase { - const remill::Register *reg_loc{nullptr}; - const remill::Register *mem_base{nullptr}; - std::int64_t offset{0}; - - bool operator==(const LocBase &lbase) const { - return reg_loc == lbase.reg_loc && mem_base == lbase.mem_base && - lbase.offset == offset; - } -}; + +template +inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + } // namespace namespace std { template <> -struct std::hash { - std::size_t operator()(const LocBase &c) const { +struct std::hash { + std::size_t operator()(const anvill::LowLoc &c) const { std::size_t result = 0; - + hash_combine(result, c.mem_reg); + hash_combine(result, c.mem_offset); + hash_combine(result, c.reg); + hash_combine(result, c.size); return result; } }; @@ -73,6 +71,11 @@ struct std::hash { namespace anvill { +bool LowLoc::operator==(const LowLoc &loc) const { + return reg == loc.reg && mem_reg == loc.mem_reg && + loc.mem_offset == mem_offset && loc.size == size; +} + // Declare this global variable in an LLVM module. llvm::GlobalVariable * VariableDecl::DeclareInModule(const std::string &name, @@ -97,6 +100,14 @@ void FunctionDecl::AddBBContexts( } } +std::uint64_t LowLoc::Size() const { + if (this->size) { + return *this->size; + } else { + return this->reg->size; + } +} + // need to be careful here about overlapping values std::vector @@ -105,42 +116,49 @@ BasicBlockContext::LiveParamsAtEntryAndExit() const { auto live_entries = this->LiveParamsAtEntry(); - auto convert_to_locbas = [](const ParameterDecl ¶m) -> LocBase { - return {param.reg, param.mem_reg, param.mem_offset}; - }; - - auto add_to_set = [convert_to_locbas]( - const std::vector ¶ms, - std::unordered_set &locs_to_add) { - std::transform(params.begin(), params.end(), - std::inserter(locs_to_add, locs_to_add.end()), - convert_to_locbas); + auto add_to_set = [](const std::vector ¶ms, + std::unordered_set &locs_to_add) { + for (const auto &p : params) { + std::copy(p.oredered_locs.begin(), p.oredered_locs.end(), + std::inserter(locs_to_add, locs_to_add.end())); + } }; - std::unordered_set covered_live_ent; + std::unordered_set covered_live_ent; add_to_set(live_entries, covered_live_ent); - std::unordered_set covered_live_exit; + std::unordered_set covered_live_exit; add_to_set(live_exits, covered_live_exit); std::vector res; - std::unordered_set covered; - auto add_all_from_vector = [&res, &covered, &covered_live_ent, - &covered_live_exit, convert_to_locbas]( - std::vector params) { - for (auto p : params) { - auto lbase = convert_to_locbas(p); - auto live_at_ent = covered_live_ent.find(lbase) != covered_live_ent.end(); - auto live_at_exit = - covered_live_exit.find(lbase) != covered_live_exit.end(); - CHECK(covered.find(lbase) == covered.end() || - (live_at_ent && live_at_exit)); - if (covered.find(lbase) == covered.end()) { - covered.insert(lbase); - auto ind = res.size(); - res.push_back({p, ind, live_at_ent, live_at_exit}); - } - } - }; + std::unordered_set covered; + auto add_all_from_vector = + [&res, &covered, &covered_live_ent, + &covered_live_exit](std::vector params) { + for (auto p : params) { + auto completely_covered = + std::all_of(p.oredered_locs.begin(), p.oredered_locs.end(), + [&covered](const LowLoc &loc) -> bool { + return covered.find(loc) != covered.end(); + }); + auto live_at_ent = std::any_of( + p.oredered_locs.begin(), p.oredered_locs.end(), + [&covered_live_ent](const LowLoc &loc) -> bool { + return covered_live_ent.find(loc) != covered_live_ent.end(); + }); + auto live_at_exit = std::any_of( + p.oredered_locs.begin(), p.oredered_locs.end(), + [&covered_live_exit](const LowLoc &loc) -> bool { + return covered_live_exit.find(loc) != covered_live_exit.end(); + }); + + if (!completely_covered) { + std::copy(p.oredered_locs.begin(), p.oredered_locs.end(), + std::inserter(covered, covered.end())); + auto ind = res.size(); + res.push_back({p, ind, live_at_ent, live_at_exit}); + } + } + }; add_all_from_vector(live_entries); add_all_from_vector(live_exits); @@ -221,7 +239,7 @@ FunctionDecl::DeclareInModule(std::string_view name, return func; } -const std::vector &SpecBlockContext::ReturnValue() const { +ValueDecl SpecBlockContext::ReturnValue() const { return this->decl.returns; } @@ -306,14 +324,14 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( llvm::SmallVector param_vals; // Get the return address. - auto ret_addr = LoadLiftedValue(return_address, types, intrinsics, ir, - state_ptr, mem_ptr); + auto ret_addr = LoadLiftedValue(return_address, types, intrinsics, this->arch, + ir, state_ptr, mem_ptr); CHECK(ret_addr && !llvm::isa_and_nonnull(ret_addr)); // Get the parameters. for (const auto ¶m_decl : params) { - const auto val = - LoadLiftedValue(param_decl, types, intrinsics, ir, state_ptr, mem_ptr); + const auto val = LoadLiftedValue(param_decl, types, intrinsics, this->arch, + ir, state_ptr, mem_ptr); if (auto inst_val = llvm::dyn_cast(val)) { inst_val->setName(param_decl.name); } @@ -333,25 +351,10 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( ret_val->setDoesNotReturn(); } - // There is a single return value, store it to the lifted state. - if (returns.size() == 1) { - auto call_ret = ret_val; - - mem_ptr = StoreNativeValue(call_ret, returns.front(), types, intrinsics, ir, + auto call_ret = ret_val; + if (!call_ret->getType()->isVoidTy()) { + mem_ptr = StoreNativeValue(call_ret, this->returns, types, intrinsics, ir, state_ptr, mem_ptr); - - // There are possibly multiple return values (or zero). Unpack the - // return value (it will be a struct type) into its components and - // write each one out into the lifted state. - } else { - unsigned index = 0; - for (const auto &ret_decl : returns) { - unsigned indexes[] = {index}; - auto elem_val = ir.CreateExtractValue(ret_val, indexes); - mem_ptr = StoreNativeValue(elem_val, ret_decl, types, intrinsics, ir, - state_ptr, mem_ptr); - index += 1; - } } // TODO(Ian): ... well ok so we already did stuff assuming the PC was one way since we lifted below it. @@ -434,27 +437,9 @@ void CallableDecl::OverrideFunctionTypeWithABIParamLayout() { } void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { - if (this->returns.size() < 1) { - return; - } else if (this->returns.size() == 1) { - // Override the return type with the type of the last return - auto new_func_type = - llvm::FunctionType::get(this->returns.front().type, - this->type->params(), this->type->isVarArg()); - this->type = new_func_type; - } else { - // Create a structure that has a field for each return - std::vector elems; - for (const auto &ret : this->returns) { - elems.push_back(ret.type); - } - - auto ret_type_struct = llvm::StructType::create(elems); - - auto new_func_type = llvm::FunctionType::get( - ret_type_struct, this->type->params(), this->type->isVarArg()); - this->type = new_func_type; - } + auto new_func_type = llvm::FunctionType::get( + this->returns.type, this->type->params(), this->type->isVarArg()); + this->type = new_func_type; } namespace { diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index a9f0a0467..ce72bdc94 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -406,7 +406,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { arg->setName(v.param.name); } - if (v.param.reg) { + if (std::all_of(v.param.oredered_locs.begin(), v.param.oredered_locs.end(), + [](const LowLoc &loc) -> bool { return loc.reg; })) { // Registers should not have aliases arg->addAttr(llvm::Attribute::get(llvm_context, llvm::Attribute::AttrKind::NoAlias)); @@ -501,9 +502,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { } - DLOG_IF(INFO, reg_const.target_value.reg) - << "Dumping " << reg_const.target_value.reg->name << " " << std::hex - << reg_const.value; + //DLOG_IF(INFO, reg_const.target_value.reg) + // << "Dumping " << reg_const.target_value.reg->name << " " << std::hex + // << reg_const.value; auto nmem = StoreNativeValue(new_value, reg_const.target_value, type_provider.Dictionary(), intrinsics, ir, this->state_ptr, @@ -555,18 +556,24 @@ void BasicBlockLifter::PackLiveValues( for (auto decl : decls) { - if (!decl.param.mem_reg) { + if (!HasMemLoc(decl.param)) { auto ptr = into_vars(decl.index); auto state_loaded_value = LoadLiftedValue( - decl.param, this->type_provider.Dictionary(), this->intrinsics, bldr, - from_state_ptr, remill::LoadMemoryPointer(bldr, this->intrinsics)); + decl.param, this->type_provider.Dictionary(), this->intrinsics, + this->options.arch, bldr, from_state_ptr, + remill::LoadMemoryPointer(bldr, this->intrinsics)); bldr.CreateStore(state_loaded_value, ptr); + } else { + // TODO(Ian): The assumption is we dont have live values split between the stack and a register for now... + // Maybe at some point we can just go ahead and store everything + CHECK(!HasRegLoc(decl.param)); } } } + void BasicBlockLifter::UnpackLiveValues( llvm::IRBuilder<> &bldr, PointerProvider returned_value, llvm::Value *into_state_ptr, @@ -575,7 +582,7 @@ void BasicBlockLifter::UnpackLiveValues( for (auto decl : decls) { // is this how we want to do this.... now the value really doesnt live in memory anywhere but the frame. - if (!decl.param.mem_reg) { + if (!HasMemLoc(decl.param)) { auto ptr = returned_value(decl.index); if (auto insn = llvm::dyn_cast(ptr)) { insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( @@ -592,6 +599,10 @@ void BasicBlockLifter::UnpackLiveValues( bldr.CreateStore(new_mem_ptr, remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); + } else { + // TODO(Ian): The assumption is we dont have live values split between the stack and a register for now... + // Maybe at some point we can just go ahead and store everything + CHECK(!HasRegLoc(decl.param)); } } CHECK(bldr.GetInsertPoint() == blk->end()); @@ -625,9 +636,12 @@ void BasicBlockLifter::CallBasicBlockFunction( &stack](size_t index) -> llvm::Value * { auto repr_var = bbvars[index]; LOG(INFO) << "Lifting: " << repr_var.param.name << " for call"; - if (repr_var.param.mem_reg) { + if (HasMemLoc(repr_var.param)) { + // TODO(Ian): the assumption here since we are able to build a single pointer here into the frame is that + // svars are single valuedecl contigous + CHECK(repr_var.param.oredered_locs.size() == 1); auto stack_ptr = stack.PointerToStackMemberFromOffset( - builder, repr_var.param.mem_offset); + builder, repr_var.param.oredered_locs[0].mem_offset); if (stack_ptr) { return *stack_ptr; } else { @@ -657,7 +671,7 @@ void BasicBlockLifter::CallBasicBlockFunction( this->UnpackLiveValues(builder, ptr_provider, parent_state, this->block_context->LiveBBParamsAtExit()); -} +} // namespace BasicBlockLifter::UnpackLiveValues(llvm::IRBuilder<>&bldr,PointerProviderreturned_value,llvm::Value*into_state_ptr,conststd::vector&decls)const void CallableBasicBlockFunction::CallBasicBlockFunction( diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index a71763eaf..a95a8405c 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -326,24 +326,14 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( AnnotateInstructions(block, pc_annotation_id, GetPCAnnotation(func_address)); llvm::Value *ret_val = nullptr; - - if (decl.returns.size() == 1) { - ret_val = LoadLiftedValue(decl.returns.front(), types, intrinsics, block, - native_state_ptr, mem_ptr); - ir.SetInsertPoint(block); - - } else if (1 < decl.returns.size()) { - ret_val = llvm::UndefValue::get(native_func->getReturnType()); - auto index = 0u; - for (auto &ret_decl : decl.returns) { - auto partial_ret_val = LoadLiftedValue(ret_decl, types, intrinsics, block, - native_state_ptr, mem_ptr); - ir.SetInsertPoint(block); - unsigned indexes[] = {index}; - ret_val = ir.CreateInsertValue(ret_val, partial_ret_val, indexes); - index += 1; - } + if (decl.returns.oredered_locs.size() != 0 && + !decl.returns.type->isVoidTy()) { + ret_val = + LoadLiftedValue(decl.returns, types, intrinsics, this->options.arch, + block, native_state_ptr, mem_ptr); } + ir.SetInsertPoint(block); + auto memory_escape = GetMemoryEscapeFunc(intrinsics); llvm::Value *escape_args[] = {mem_ptr}; diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 62fdf003d..1082fc889 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -124,6 +125,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, LOG(FATAL) << remill::GetErrorString(err); } + /* if (auto used = module.getGlobalVariable("llvm.used"); used) { used->setLinkage(llvm::GlobalValue::PrivateLinkage); used->eraseFromParent(); @@ -132,7 +134,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, if (auto used = module.getGlobalVariable("llvm.compiler.used"); used) { used->setLinkage(llvm::GlobalValue::PrivateLinkage); used->eraseFromParent(); - } + }*/ LOG(INFO) << "Optimizing module."; @@ -163,6 +165,10 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // llvm::InlineParams params; llvm::FunctionAnalysisManager fam; + llvm::Triple ModuleTriple(module.getTargetTriple()); + llvm::TargetLibraryInfoImpl TLII(ModuleTriple); + TLII.disableAllFunctions(); + fam.registerPass([&] { return llvm::TargetLibraryAnalysis(TLII); }); pb.registerFunctionAnalyses(fam); pb.registerModuleAnalyses(mam); pb.registerCGSCCAnalyses(cam); diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index bec8aeebc..be4ae8a68 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -55,7 +55,8 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, remillFunctionCall->getFunction()->getParent()); LOG(INFO) << "Replacing call from: " << remill::LLVMThingToString(remillFunctionCall) - << " with call to " << std::hex << ra.u.address; + << " with call to " << std::hex << ra.u.address + << " d has: " << std::string(entity->getName()); auto new_mem = fdecl->CallFromLiftedBlock(entity, lifter.Options().TypeDictionary(), table, ir, state_ptr, mem_ptr); diff --git a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp index 4f95d626f..86cc6bf18 100644 --- a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp +++ b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp @@ -40,7 +40,7 @@ ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( } - const std::vector &ret_decl = bbcont.ReturnValue(); + ValueDecl ret_decl = bbcont.ReturnValue(); remill::IntrinsicTable intrinsics(F.getParent()); auto pres_analyses = llvm::PreservedAnalyses::all(); for (auto rep : to_replace) { @@ -51,12 +51,14 @@ ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( std::vector args; - for (auto vdecl : ret_decl) { + + if (ret_decl.oredered_locs.size() != 0 && !ret_decl.type->isVoidTy()) { args.push_back(anvill::LoadLiftedValue( - vdecl, this->lifter.Options().TypeDictionary(), intrinsics, ir, state, - mem)); + ret_decl, this->lifter.Options().TypeDictionary(), intrinsics, + this->lifter.Options().arch, ir, state, mem)); } + auto tgt = GetOrCreateAnvillReturnFunc(F.getParent()); ir.CreateCall(tgt, args); diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index ffffd7341..218e7e864 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -160,8 +160,9 @@ class StackModel { // this feels weird maybe it should be all stack variables but then if the variable isnt live... // we will have discovered something that should have been live. for (const auto &v : cont.LiveParamsAtEntryAndExit()) { - if (v.param.mem_reg && - v.param.mem_reg->name == arch->StackPointerRegisterName()) { + if (HasMemLoc(v.param) && v.param.oredered_locs.size() == 1 && + v.param.oredered_locs[0].mem_reg->name == + arch->StackPointerRegisterName()) { this->InsertFrameVar(index, v.param); } index += 1; @@ -189,7 +190,7 @@ class StackModel { auto prev_decl = (--prec)->second; - CHECK(prev_decl.decl.mem_offset <= off); + CHECK(prev_decl.decl.oredered_locs[0].mem_offset <= off); return {prev_decl}; } @@ -201,16 +202,17 @@ class StackModel { return std::nullopt; } - LOG(INFO) << "value found lte offset: " << vlte->decl.mem_offset << " " - << off; + LOG(INFO) << "value found lte offset: " + << vlte->decl.oredered_locs[0].mem_offset << " " << off; - auto offset_into_var = off - vlte->decl.mem_offset; + auto offset_into_var = off - vlte->decl.oredered_locs[0].mem_offset; if (offset_into_var < static_cast(GetParamDeclSize(vlte->decl))) { return {{offset_into_var, *vlte}}; } LOG(INFO) << "Looking for off " << off << " but not fitting " - << offset_into_var << " got off " << vlte->decl.mem_offset; + << offset_into_var << " got off " + << vlte->decl.oredered_locs[0].mem_offset; return std::nullopt; } @@ -223,22 +225,24 @@ class StackModel { void InsertFrameVar(size_t index, ParameterDecl var) { - if (VarOverlaps(var.mem_offset) || - VarOverlaps(var.mem_offset + GetParamDeclSize(var) - 1)) { - - auto oparam = GetOverlappingParam(var.mem_offset); - if (!VarOverlaps(var.mem_offset)) { - oparam = - GetOverlappingParam(var.mem_offset + GetParamDeclSize(var) - 1); + if (VarOverlaps(var.oredered_locs[0].mem_offset) || + VarOverlaps(var.oredered_locs[0].mem_offset + GetParamDeclSize(var) - + 1)) { + + auto oparam = GetOverlappingParam(var.oredered_locs[0].mem_offset); + if (!VarOverlaps(var.oredered_locs[0].mem_offset)) { + oparam = GetOverlappingParam(var.oredered_locs[0].mem_offset + + GetParamDeclSize(var) - 1); } LOG(FATAL) << "Inserting variable that overlaps with current frame " - << var.mem_offset << " with size: " << GetParamDeclSize(var) - << " Overlaps with " << oparam->decl.decl.mem_offset + << var.oredered_locs[0].mem_offset + << " with size: " << GetParamDeclSize(var) << " Overlaps with " + << oparam->decl.decl.oredered_locs[0].mem_offset << " with size " << GetParamDeclSize(oparam->decl.decl); } - this->frame.insert({var.mem_offset, {index, var}}); + this->frame.insert({var.oredered_locs[0].mem_offset, {index, var}}); } }; diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 01822fd94..610c64cd9 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -159,8 +159,15 @@ Result ProtobufTranslator::ParseIntoCallableDecl( // Get the return address location. if (function.has_return_address()) { auto ret_addr = function.return_address(); - auto maybe_ret = DecodeValue(ret_addr, SizeToType(arch->address_size), - "return address"); + auto maybe_low_loc_ret_addr = DecodeLowLoc(ret_addr, "return address"); + if (!maybe_low_loc_ret_addr.Succeeded()) { + return maybe_low_loc_ret_addr.TakeError(); + } + + std::vector low_loc_ret_addr = { + maybe_low_loc_ret_addr.TakeValue()}; + auto maybe_ret = ValueDeclFromOrderedLowLoc( + low_loc_ret_addr, SizeToType(arch->address_size), "return address"); if (!maybe_ret.Succeeded()) { auto err = maybe_ret.TakeError(); std::stringstream ss; @@ -217,41 +224,26 @@ Result ProtobufTranslator::ParseIntoCallableDecl( } i = 0u; - for (const ::specification::Value &ret : function.return_().values()) { - auto maybe_ret = DecodeValue(ret, maybe_ret_type.Value(), "return value"); - if (maybe_ret.Succeeded()) { - decl.returns.emplace_back(maybe_ret.Value()); - } else { - auto err = maybe_ret.TakeError(); - std::stringstream ss; - ss << "Could not decode " << i << "th return value in function at " - << address_str << ": " << err; - return {ss.str()}; - } - ++i; + + + auto maybe_ret = + DecodeValueDecl(function.return_().values(), maybe_ret_type.TakeValue(), + "return value"); + if (!maybe_ret.Succeeded()) { + auto err = maybe_ret.TakeError(); + std::stringstream ss; + ss << "Could not decode " << i << "th return value in function at " + << address_str << ": " << err; + return {ss.str()}; } + decl.returns = maybe_ret.TakeValue(); + // Figure out the return type of this function based off the return // values. - llvm::Type *ret_type = nullptr; - if (decl.returns.empty()) { + llvm::Type *ret_type = ret_type = decl.returns.type; + if (decl.returns.oredered_locs.empty()) { ret_type = llvm::Type::getVoidTy(context); - - } else if (decl.returns.size() == 1) { - ret_type = decl.returns[0].type; - - // The multiple return value case is most interesting, and somewhere - // where we see some divergence between C and what we will decompile. - // For example, on 32-bit x86, a 64-bit return value might be spread - // across EAX:EDX. Instead of representing this by a single value, we - // represent it as a structure if two 32-bit ints, and make sure to say - // that one part is in EAX, and the other is in EDX. - } else { - llvm::SmallVector ret_types; - for (auto &ret_val : decl.returns) { - ret_types.push_back(ret_val.type); - } - ret_type = llvm::StructType::get(context, ret_types, false); } llvm::SmallVector param_types; @@ -277,34 +269,37 @@ ProtobufTranslator::ProtobufTranslator( type_translator.Dictionary().u.named.void_, context)), type_map(type_map) {} -// Decode the location of a value. This applies to both parameters and -// return values. -anvill::Result -ProtobufTranslator::DecodeValue(const ::specification::Value &value, - TypeSpec type, const char *desc) const { - ValueDecl decl; +anvill::Result +ProtobufTranslator::DecodeLowLoc(const ::specification::Value &value, + const char *desc) const { + LowLoc loc; if (value.has_reg()) { auto ® = value.reg(); - decl.reg = arch->RegisterByName(reg.register_name()); - if (!decl.reg) { + loc.reg = arch->RegisterByName(reg.register_name()); + if (!loc.reg) { std::stringstream ss; ss << "Unable to locate register '" << reg.register_name() << "' used for storing " << desc; return ss.str(); } + if (reg.has_subreg_sz()) { + loc.size = reg.subreg_sz(); + } + } else if (value.has_mem()) { auto &mem = value.mem(); if (mem.has_base_reg()) { - decl.mem_reg = arch->RegisterByName(mem.base_reg()); - if (!decl.mem_reg) { + loc.mem_reg = arch->RegisterByName(mem.base_reg()); + if (!loc.mem_reg) { std::stringstream ss; ss << "Unable to locate base register '" << mem.base_reg() << "' used for storing " << desc; return ss.str(); } } - decl.mem_offset = mem.offset(); + loc.mem_offset = mem.offset(); + loc.size = mem.size(); } else { std::stringstream ss; ss << "A " << desc << " declaration must specify its location with " @@ -312,6 +307,16 @@ ProtobufTranslator::DecodeValue(const ::specification::Value &value, return ss.str(); } + return loc; +} + +anvill::Result +ProtobufTranslator::ValueDeclFromOrderedLowLoc(std::vector loc, + TypeSpec type, + const char *desc) const { + + ValueDecl decl; + decl.oredered_locs = std::move(loc); decl.spec_type = type; auto llvm_type = type_translator.DecodeFromSpec(decl.spec_type); if (!llvm_type.Succeeded()) { @@ -325,6 +330,25 @@ ProtobufTranslator::DecodeValue(const ::specification::Value &value, return decl; } + +// Decode the location of a value. This applies to both parameters and +// return values. +anvill::Result ProtobufTranslator::DecodeValueDecl( + const ::google::protobuf::RepeatedPtrField<::specification::Value> &values, + TypeSpec type, const char *desc) const { + std::vector locs; + for (const auto &val : values) { + auto loc = DecodeLowLoc(val, desc); + if (!loc.Succeeded()) { + return loc.TakeError(); + } + locs.push_back(loc.TakeValue()); + } + + return ValueDeclFromOrderedLowLoc(std::move(locs), type, desc); +} + + // Decode a parameter from the JSON spec. Parameters should have names, // as that makes the bitcode slightly easier to read, but names are // not required. They must have types, and these types should be mostly @@ -337,12 +361,6 @@ Result ProtobufTranslator::DecodeParameter( return {"Parameter with no representation"}; } auto &repr_var = param.repr_var(); - if (repr_var.values_size() != 1) { - std::stringstream ss; - ss << "Unsupported number of values for parameter spec: " - << repr_var.values_size(); - return ss.str(); - } if (!repr_var.has_type()) { return {"Parameter without type spec"}; @@ -352,8 +370,8 @@ Result ProtobufTranslator::DecodeParameter( return maybe_type.TakeError(); } - auto &val = repr_var.values()[0]; - auto maybe_decl = DecodeValue(val, maybe_type.Value(), "function parameter"); + auto maybe_decl = DecodeValueDecl(repr_var.values(), maybe_type.Value(), + "function parameter"); if (!maybe_decl.Succeeded()) { return maybe_decl.TakeError(); } @@ -542,21 +560,21 @@ Result ProtobufTranslator::DecodeFunction( } for (auto &[name, local] : function.local_variables()) { - decl.locals[name].name = name; auto type_spec = DecodeType(local.type()); if (!type_spec.Succeeded()) { return type_spec.Error(); } - for (auto &value : local.values()) { - auto value_decl = DecodeValue(value, type_spec.Value(), "local variable"); - if (!value_decl.Succeeded()) { - return value_decl.Error(); - } - decl.locals[name].values.push_back(value_decl.Value()); + auto value_decl = + DecodeValueDecl(local.values(), type_spec.Value(), "local variable"); + if (!value_decl.Succeeded()) { + return value_decl.Error(); } + + decl.locals[name] = {value_decl.TakeValue(), name}; } + return decl; } @@ -568,8 +586,6 @@ void ProtobufTranslator::AddLiveValuesToBB( auto &v = map.insert({bb_addr, std::vector()}).first->second; for (auto var : values) { - LOG_IF(FATAL, var.repr_var().values_size() != 1) - << "Symbols must be represented by a single valuedecl."; auto param = DecodeParameter(var); if (!param.Succeeded()) { LOG(ERROR) << "Unable to decode live parameter " << param.TakeError(); @@ -611,8 +627,8 @@ void ProtobufTranslator::ParseCFGIntoFunction( auto stackptr_type_spec = SizeToType(stackptr->size * 8); auto target_vdecl = - DecodeValue(symval.target_value().values()[0], stackptr_type_spec, - "Unable to get value decl for stack offset relation"); + DecodeValueDecl(symval.target_value().values(), stackptr_type_spec, + "Unable to get value decl for stack offset relation"); if (!target_vdecl.Succeeded()) { LOG(ERROR) << "Failed to lift value " << target_vdecl.TakeError(); @@ -639,8 +655,8 @@ void ProtobufTranslator::ParseCFGIntoFunction( symval.curr_val().constant().is_tainted_by_pc(); DLOG(INFO) << "Adding global register override for " - << const_val.target_value.reg->name << " " << std::hex - << const_val.value; + << const_val.target_value.oredered_locs[0].reg->name << " " + << std::hex << const_val.value; constant_values.push_back(const_val); } else { LOG(FATAL) << symval.curr_val().GetTypeName() diff --git a/lib/Protobuf.h b/lib/Protobuf.h index 7f93a85e3..ef50ceb90 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "anvill/Type.h" #include "specification.pb.h" @@ -62,11 +63,22 @@ class ProtobufTranslator { const ::specification::TypeSpec &obj, const std::unordered_map &map); + // Parse the location of a value. This applies to both parameters and // return values. + anvill::Result + DecodeLowLoc(const ::specification::Value &value, const char *desc) const; + anvill::Result - DecodeValue(const ::specification::Value &obj, TypeSpec type, - const char *desc) const; + ValueDeclFromOrderedLowLoc(std::vector loc, TypeSpec type, + const char *desc) const; + + // Parse the location of a value. This applies to both parameters and + // return values. + anvill::Result DecodeValueDecl( + const ::google::protobuf::RepeatedPtrField<::specification::Value> + &values, + TypeSpec type, const char *desc) const; Result diff --git a/lib/Utils.cpp b/lib/Utils.cpp index b7dda781b..8fff44458 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -13,11 +13,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -29,15 +32,20 @@ #include #include #include +#include #include #include #include #include +#include +#include #include +#include #include #include #include +#include namespace anvill { @@ -316,24 +324,90 @@ void StoreNativeValueToRegister(llvm::Value *native_val, } -llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, +llvm::Value *LoadSubcomponent(const LowLoc &loc, llvm::Type *target_type, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, llvm::IRBuilder<> &ir, llvm::Value *state_ptr, llvm::Value *mem_ptr) { - auto func = ir.GetInsertBlock()->getParent(); auto module = func->getParent(); auto &context = module->getContext(); + CHECK_EQ(module, intrinsics.read_memory_8->getParent()); - llvm::Type *decl_type = remill::RecontextualizeType(decl.type, context); + llvm::Type *decl_type = remill::RecontextualizeType(target_type, context); - CHECK_EQ(module, intrinsics.read_memory_8->getParent()); - CHECK_EQ(native_val->getType(), decl_type); + // Load it out of a register. + if (loc.reg) { + auto reg_type = remill::RecontextualizeType(loc.reg->type, context); + auto ptr_to_reg = loc.reg->AddressOf(state_ptr, ir); + auto reg = ir.CreateLoad(reg_type, ptr_to_reg); + CopyMetadataTo(mem_ptr, reg); + auto adapted_val = types.ConvertValueToType(ir, reg, decl_type); + + if (adapted_val) { + return adapted_val; + } else { + auto bc = + ir.CreateBitCast(ptr_to_reg, llvm::PointerType::get(context, 0)); + auto li = ir.CreateLoad(decl_type, bc); + CopyMetadataTo(mem_ptr, bc); + CopyMetadataTo(mem_ptr, li); + return li; + } + + // Load it out of memory. + } else if (loc.mem_reg) { + auto mem_reg_type = remill::RecontextualizeType(loc.mem_reg->type, context); + auto ptr_to_reg = loc.mem_reg->AddressOf(state_ptr, ir); + llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); + CopyMetadataTo(mem_ptr, addr); + if (0ll < loc.mem_offset) { + addr = ir.CreateAdd( + addr, + llvm::ConstantInt::get( + mem_reg_type, static_cast(loc.mem_offset), false)); + CopyMetadataTo(mem_ptr, addr); + + } else if (0ll > loc.mem_offset) { + addr = ir.CreateSub( + addr, llvm::ConstantInt::get( + mem_reg_type, static_cast(-loc.mem_offset), + false)); + CopyMetadataTo(mem_ptr, addr); + } + + auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); + return types.ConvertValueToType(ir, val, decl_type); + + // Store to memory at an absolute offset. + } else if (loc.mem_offset) { + const auto addr = llvm::ConstantInt::get( + remill::NthArgument(intrinsics.read_memory_8, 1u)->getType(), + static_cast(loc.mem_offset), false); + auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); + + CopyMetadataTo(mem_ptr, val); + return types.ConvertValueToType(ir, val, decl_type); + + } else { + DLOG(ERROR) << "Unable to load lifted value of type: " + << remill::LLVMThingToString(target_type); + return llvm::UndefValue::get(decl_type); + } +} + + +llvm::Value *StoreSubcomponent(llvm::Value *native_sub, const LowLoc &decl, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, + llvm::Value *mem_ptr) { + + llvm::LLVMContext &context = state_ptr->getContext(); // Store it to a register. if (decl.reg) { - StoreNativeValueToRegister(native_val, decl.reg, types, intrinsics, ir, + StoreNativeValueToRegister(native_sub, decl.reg, types, intrinsics, ir, state_ptr); return mem_ptr; @@ -344,37 +418,103 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir); llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); - CopyMetadataTo(native_val, addr); + CopyMetadataTo(native_sub, addr); if (0ll < decl.mem_offset) { addr = ir.CreateAdd( addr, llvm::ConstantInt::get( mem_reg_type, static_cast(decl.mem_offset), false)); - CopyMetadataTo(native_val, addr); + CopyMetadataTo(native_sub, addr); } else if (0ll > decl.mem_offset) { addr = ir.CreateSub( addr, llvm::ConstantInt::get( mem_reg_type, static_cast(-decl.mem_offset), false)); - CopyMetadataTo(native_val, addr); + CopyMetadataTo(native_sub, addr); } - return remill::StoreToMemory(intrinsics, ir, native_val, mem_ptr, addr); + return remill::StoreToMemory(intrinsics, ir, native_sub, mem_ptr, addr); // Store to memory at an absolute offset. } else if (decl.mem_offset) { const auto addr = llvm::ConstantInt::get( remill::NthArgument(intrinsics.read_memory_8, 1u)->getType(), static_cast(decl.mem_offset), false); - return remill::StoreToMemory(intrinsics, ir, native_val, mem_ptr, addr); + return remill::StoreToMemory(intrinsics, ir, native_sub, mem_ptr, addr); } else { return llvm::UndefValue::get(mem_ptr->getType()); } } +llvm::Value *ExtractSubcomponent(unsigned int elem, llvm::Type *dest_type, + llvm::Value *native_val, + llvm::Type *native_type, + llvm::IRBuilder<> &ir) { + auto i32 = llvm::IntegerType::getInt32Ty(native_val->getContext()); + return ir.CreateLoad(dest_type, + ir.CreateGEP(native_type, native_val, + {llvm::ConstantInt::get(i32, 0), + llvm::ConstantInt::get(i32, elem)})); +} + + +llvm::IntegerType *LocType(const LowLoc &loc, llvm::LLVMContext &cont) { + return llvm::IntegerType::get(cont, loc.Size() * 8); +} + +llvm::StructType *CreateDeclSty(const std::vector &lowlocs, + llvm::LLVMContext &cont) { + + std::vector tys; + std::transform(lowlocs.begin(), lowlocs.end(), std::back_inserter(tys), + [&cont](const LowLoc &loc) -> llvm::Type * { + return LocType(loc, cont); + }); + return llvm::StructType::get(cont, tys, true); +} + +llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, + const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + llvm::IRBuilder<> &ir, llvm::Value *state_ptr, + llvm::Value *mem_ptr) { + + auto func = ir.GetInsertBlock()->getParent(); + auto module = func->getParent(); + auto &context = module->getContext(); + + llvm::Type *decl_type = remill::RecontextualizeType(decl.type, context); + + CHECK_EQ(module, intrinsics.read_memory_8->getParent()); + CHECK_EQ(native_val->getType(), decl_type); + + if (decl.oredered_locs.size() == 1) { + return StoreSubcomponent(native_val, decl.oredered_locs.at(0), types, + intrinsics, ir, state_ptr, mem_ptr); + } else { + + unsigned int ind = 0; + + auto sty = CreateDeclSty(decl.oredered_locs, context); + auto curr_val = ir.CreateAlloca(sty); + + ir.CreateStore(native_val, curr_val); + auto mem = mem_ptr; + for (const auto &comp : decl.oredered_locs) { + auto compvl = + ExtractSubcomponent(ind, LocType(comp, context), curr_val, sty, ir); + mem = StoreSubcomponent(compvl, comp, types, intrinsics, ir, state_ptr, + mem); + ind++; + } + + return mem; + } +} + // Produce one or more instructions in `in_block` to store the // native value `native_val` into the lifted state associated // with `decl`. @@ -390,88 +530,85 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, } -llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, - const remill::IntrinsicTable &intrinsics, - llvm::IRBuilder<> &ir, llvm::Value *state_ptr, - llvm::Value *mem_ptr) { - - auto func = ir.GetInsertBlock()->getParent(); - auto module = func->getParent(); - auto &context = module->getContext(); - CHECK_EQ(module, intrinsics.read_memory_8->getParent()); - - llvm::Type *decl_type = remill::RecontextualizeType(decl.type, context); - - // Load it out of a register. - if (decl.reg) { - auto reg_type = remill::RecontextualizeType(decl.reg->type, context); - auto ptr_to_reg = decl.reg->AddressOf(state_ptr, ir); - auto reg = ir.CreateLoad(reg_type, ptr_to_reg); - CopyMetadataTo(mem_ptr, reg); - auto adapted_val = types.ConvertValueToType(ir, reg, decl_type); +std::optional +GetSubcomponentType(const LowLoc &loc, uint64_t offset, llvm::Type *target_type, + llvm::DataLayout &data) { + // there's two situations here, either we have a primitive target type in which case the loc must + // indicate the size for each component, otherwise we decompose the target + if (auto itype = llvm::isa_and_nonnull(target_type)) { + return llvm::IntegerType::get(target_type->getContext(), loc.Size() * 8); + } else { + llvm::Type *ty = target_type; + llvm::APInt off(64, offset); + auto ind = data.getGEPIndexForOffset(ty, off); - if (adapted_val) { - return adapted_val; - } else { - auto bc = - ir.CreateBitCast(ptr_to_reg, llvm::PointerType::get(context, 0)); - auto li = ir.CreateLoad(decl_type, bc); - CopyMetadataTo(mem_ptr, bc); - CopyMetadataTo(mem_ptr, li); - return li; + if (ind) { + return ty; } + } - // Load it out of memory. - } else if (decl.mem_reg) { - auto mem_reg_type = - remill::RecontextualizeType(decl.mem_reg->type, context); - auto ptr_to_reg = decl.mem_reg->AddressOf(state_ptr, ir); - llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); - CopyMetadataTo(mem_ptr, addr); - if (0ll < decl.mem_offset) { - addr = ir.CreateAdd( - addr, llvm::ConstantInt::get( - mem_reg_type, static_cast(decl.mem_offset), - false)); - CopyMetadataTo(mem_ptr, addr); - - } else if (0ll > decl.mem_offset) { - addr = ir.CreateSub( - addr, llvm::ConstantInt::get( - mem_reg_type, static_cast(-decl.mem_offset), - false)); - CopyMetadataTo(mem_ptr, addr); - } + return std::nullopt; +} - auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); - return types.ConvertValueToType(ir, val, decl_type); +llvm::Value *BuildMultiComponentValue(llvm::IRBuilder<> &ir, + const std::vector comps, + llvm::Type *sty, llvm::Type *target_type, + llvm::DataLayout &dl) { + auto i32_type = llvm::Type::getInt32Ty(sty->getContext()); + auto storage = ir.CreateAlloca(sty); + uint64_t ind = 0; + for (auto c : comps) { + ir.CreateStore(c, ir.CreateGEP(sty, storage, + {llvm::ConstantInt::get(i32_type, 0), + llvm::ConstantInt::get(i32_type, ind)})); + ind += 1; + } - // Store to memory at an absolute offset. - } else if (decl.mem_offset) { - const auto addr = llvm::ConstantInt::get( - remill::NthArgument(intrinsics.read_memory_8, 1u)->getType(), - static_cast(decl.mem_offset), false); - auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); + return ir.CreateLoad(target_type, storage); +} - CopyMetadataTo(mem_ptr, val); - return types.ConvertValueToType(ir, val, decl_type); +llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, + const remill::IntrinsicTable &intrinsics, + const remill::Arch *arch, llvm::IRBuilder<> &ir, + llvm::Value *state_ptr, llvm::Value *mem_ptr) { + if (decl.oredered_locs.size() == 1) { + return LoadSubcomponent(decl.oredered_locs[0], decl.type, types, intrinsics, + ir, state_ptr, mem_ptr); } else { - DLOG(ERROR) << "Unable to load lifted value of type: " - << remill::LLVMThingToString(decl.type); - return llvm::UndefValue::get(decl_type); + uint64_t offset = 0; + std::vector comps; + auto dl = arch->DataLayout(); + + for (const auto &loc : decl.oredered_locs) { + + auto subty = GetSubcomponentType(loc, offset, decl.type, dl); + if (!subty) { + LOG(ERROR) << "Lifted value undef because no subcomponent for " + << remill::LLVMThingToString(decl.type) << " at offset " + << offset; + return llvm::UndefValue::get(decl.type); + } + comps.push_back(LoadSubcomponent(loc, *subty, types, intrinsics, ir, + state_ptr, mem_ptr)); + + offset += loc.Size(); + } + auto sty = CreateDeclSty(decl.oredered_locs, state_ptr->getContext()); + return BuildMultiComponentValue(ir, comps, sty, decl.type, dl); } } llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, + const remill::Arch *arch, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr) { llvm::IRBuilder ir(in_block); - return LoadLiftedValue(decl, types, intrinsics, ir, state_ptr, mem_ptr); + return LoadLiftedValue(decl, types, intrinsics, arch, ir, state_ptr, mem_ptr); } namespace { @@ -844,5 +981,15 @@ ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, return func->getArg(index + remill::kNumBlockArgs + 1); } +bool HasMemLoc(const ValueDecl &v) { + return std::any_of(v.oredered_locs.begin(), v.oredered_locs.end(), + [](const LowLoc &loc) -> bool { return loc.mem_reg; }); +} + +bool HasRegLoc(const ValueDecl &v) { + return std::any_of(v.oredered_locs.begin(), v.oredered_locs.end(), + [](const LowLoc &loc) -> bool { return loc.reg; }); +} + } // namespace anvill diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 3bf1ea36c..2f5ae380e 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 3bf1ea36c73ad35cbf8faa44f8b9bd8c8f911f97 +Subproject commit 2f5ae380e78f2288ead5f4c1c6aef30c68c9d721 diff --git a/remill b/remill index 63406400b..b26e8ebe0 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit 63406400b489c92190e81beb31593fe54e58eaac +Subproject commit b26e8ebe0e443622f3b3606bb184256a59e02bde From 59d317e4a43171913dcd7ff5ea96460a7421a22e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 24 Feb 2023 12:43:49 -0500 Subject: [PATCH 113/163] hack to force pack lives to occur before recovered anvill returns (#357) --- lib/Passes/RemoveAssignmentsToNextPC.cpp | 16 ++-------------- ...lFunctionReturnsWithAnvillFunctionReturns.cpp | 7 ++++++- lib/Passes/Utils.cpp | 15 +++++++++++++++ lib/Passes/Utils.h | 2 ++ 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/Passes/RemoveAssignmentsToNextPC.cpp b/lib/Passes/RemoveAssignmentsToNextPC.cpp index 08db0151c..b1836dc6d 100644 --- a/lib/Passes/RemoveAssignmentsToNextPC.cpp +++ b/lib/Passes/RemoveAssignmentsToNextPC.cpp @@ -17,6 +17,8 @@ #include +#include "Utils.h" + namespace anvill { llvm::StringRef RemoveAssignmentsToNextPC::name(void) { @@ -39,20 +41,6 @@ UniqueAssignmentToNextPc(llvm::Function *func) { return std::nullopt; } -std::optional UniqueReturn(llvm::Function *func) { - std::optional r = std::nullopt; - for (auto &insn : llvm::instructions(func)) { - if (auto nret = llvm::dyn_cast(&insn)) { - if (r) { - return std::nullopt; - } else { - r = nret; - } - } - } - - return r; -} llvm::Function *GetOrCreateGotoInstrinsic(llvm::Module *mod, llvm::IntegerType *addr_ty) { diff --git a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp index 86cc6bf18..cea542ede 100644 --- a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp +++ b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp @@ -40,6 +40,8 @@ ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( } + auto unique_ret = UniqueReturn(&F); + ValueDecl ret_decl = bbcont.ReturnValue(); remill::IntrinsicTable intrinsics(F.getParent()); auto pres_analyses = llvm::PreservedAnalyses::all(); @@ -48,7 +50,10 @@ ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( auto mem = rep->getArgOperand(2); llvm::IRBuilder<> ir(rep); ir.SetInsertPoint(rep); - + // TODO(Ian): assumes the block is terminated by a ret... what about conditional returns + if (unique_ret && to_replace.size() == 1) { + ir.SetInsertPoint(*unique_ret); + } std::vector args; diff --git a/lib/Passes/Utils.cpp b/lib/Passes/Utils.cpp index d1bef8a2b..05197a2ac 100644 --- a/lib/Passes/Utils.cpp +++ b/lib/Passes/Utils.cpp @@ -245,4 +245,19 @@ llvm::Function *GetOrCreateAnvillReturnFunc(llvm::Module *mod) { anvill::kAnvillBasicBlockReturn, mod); } +std::optional UniqueReturn(llvm::Function *func) { + std::optional r = std::nullopt; + for (auto &insn : llvm::instructions(func)) { + if (auto nret = llvm::dyn_cast(&insn)) { + if (r) { + return std::nullopt; + } else { + r = nret; + } + } + } + + return r; +} + } // namespace anvill diff --git a/lib/Passes/Utils.h b/lib/Passes/Utils.h index 536a26806..23d7b949f 100644 --- a/lib/Passes/Utils.h +++ b/lib/Passes/Utils.h @@ -78,4 +78,6 @@ llvm::Function *AddressOfReturnAddressFunction(llvm::Module *module); llvm::Function *GetOrCreateAnvillReturnFunc(llvm::Module *module); +std::optional UniqueReturn(llvm::Function *func); + } // namespace anvill From 8c27f410d0d8fe2342064ec5c516c75648a5d642 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Sun, 12 Mar 2023 13:21:08 -0400 Subject: [PATCH 114/163] adapt register types to address types as needed --- lib/Utils.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 8fff44458..cb7d6f305 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -376,6 +376,10 @@ llvm::Value *LoadSubcomponent(const LowLoc &loc, llvm::Type *target_type, CopyMetadataTo(mem_ptr, addr); } + if (addr->getType() != loc.mem_reg->arch->AddressType()) { + addr = AdaptToType(ir, addr, loc.mem_reg->arch->AddressType()); + } + auto val = remill::LoadFromMemory(intrinsics, ir, decl_type, mem_ptr, addr); return types.ConvertValueToType(ir, val, decl_type); @@ -420,6 +424,7 @@ llvm::Value *StoreSubcomponent(llvm::Value *native_sub, const LowLoc &decl, llvm::Value *addr = ir.CreateLoad(mem_reg_type, ptr_to_reg); CopyMetadataTo(native_sub, addr); + if (0ll < decl.mem_offset) { addr = ir.CreateAdd( addr, llvm::ConstantInt::get( @@ -435,6 +440,10 @@ llvm::Value *StoreSubcomponent(llvm::Value *native_sub, const LowLoc &decl, CopyMetadataTo(native_sub, addr); } + if (addr->getType() != decl.mem_reg->arch->AddressType()) { + addr = AdaptToType(ir, addr, decl.mem_reg->arch->AddressType()); + } + return remill::StoreToMemory(intrinsics, ir, native_sub, mem_ptr, addr); // Store to memory at an absolute offset. From 23a2ee35697673bd09bdf5d254f80b75c590e57f Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 10 Mar 2023 16:34:22 -0500 Subject: [PATCH 115/163] simple inlining --- include/anvill/Lifters.h | 5 +- .../anvill/Passes/InlineBasicBlockFunctions.h | 33 ++++++++++ include/anvill/Passes/RemoveAnvillReturns.h | 16 +++++ lib/CMakeLists.txt | 2 + lib/Optimize.cpp | 42 +++++++++++- lib/Passes/InlineBasicBlockFunctions.cpp | 39 +++++++++++ lib/Passes/RemoveAnvillReturns.cpp | 65 +++++++++++++++++++ 7 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 include/anvill/Passes/InlineBasicBlockFunctions.h create mode 100644 include/anvill/Passes/RemoveAnvillReturns.h create mode 100644 lib/Passes/InlineBasicBlockFunctions.cpp create mode 100644 lib/Passes/RemoveAnvillReturns.cpp diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 6c71a1dde..e751ea9ea 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -219,7 +219,8 @@ class LifterOptions { //TODO(ian): This should be initialized by an OS + arch pair stack_pointer_is_signed(false), should_remove_anvill_pc(true), - should_remove_assignments_to_next_pc(false) { + should_remove_assignments_to_next_pc(false), + should_inline_basic_blocks(true) { CheckModuleContextMatchesArch(); } @@ -293,6 +294,8 @@ class LifterOptions { bool should_remove_assignments_to_next_pc : 1; + bool should_inline_basic_blocks : 1; + private: LifterOptions(void) = delete; diff --git a/include/anvill/Passes/InlineBasicBlockFunctions.h b/include/anvill/Passes/InlineBasicBlockFunctions.h new file mode 100644 index 000000000..6c4c42b87 --- /dev/null +++ b/include/anvill/Passes/InlineBasicBlockFunctions.h @@ -0,0 +1,33 @@ + +#pragma once + +#include +#include +#include + +#include "anvill/Lifters.h" + + +namespace anvill { + +// attempts to replace assignments to next pc with idiomatic control flow that terminates the block +// with the goto intrinsic +class InlineBasicBlockFunctions final + : public BasicBlockPass { + private: + const EntityLifter &lifter; + + public: + InlineBasicBlockFunctions(const BasicBlockContexts &contexts, + const EntityLifter &lifter) + : BasicBlockPass(contexts), + lifter(lifter) {} + + static llvm::StringRef name(void); + + + llvm::PreservedAnalyses + runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &); +}; +} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Passes/RemoveAnvillReturns.h b/include/anvill/Passes/RemoveAnvillReturns.h new file mode 100644 index 000000000..dd362b6ad --- /dev/null +++ b/include/anvill/Passes/RemoveAnvillReturns.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace anvill { +class RemoveAnvillReturns final + : public llvm::PassInfoMixin { + public: + RemoveAnvillReturns(void) {} + + static llvm::StringRef name(void); + + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &AM); +}; +} // namespace anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index d1a037200..2f68bfa93 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -62,6 +62,8 @@ set(anvill_passes RemoveCallIntrinsics ReplaceRemillFunctionReturnsWithAnvillFunctionReturns RemoveAssignmentsToNextPC + InlineBasicBlockFunctions + RemoveAnvillReturns ) set(anvill_arch_HEADERS diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 1082fc889..35dc84598 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -11,6 +11,7 @@ #include // clang-format off +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +61,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -74,7 +78,7 @@ #include #include -#include "anvill/Passes/RemoveAssignmentsToNextPC.h" +#include "anvill/Passes/RemoveAnvillReturns.h" #include "anvill/Specification.h" namespace anvill { @@ -338,6 +342,42 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, } } + if (lifter.Options().should_inline_basic_blocks) { + llvm::FunctionPassManager inliner; + + inliner.addPass(InlineBasicBlockFunctions(contexts, lifter)); + + llvm::ModulePassManager mpminliner; + mpminliner.addPass( + llvm::createModuleToFunctionPassAdaptor(std::move(inliner))); + mpminliner.addPass( + llvm::createModuleToPostOrderCGSCCPassAdaptor(llvm::InlinerPass())); + llvm::FunctionPassManager rm_returns; + rm_returns.addPass(anvill::RemoveAnvillReturns()); + mpminliner.addPass( + llvm::createModuleToFunctionPassAdaptor(std::move(rm_returns))); + + mpminliner.run(module, mam); + + // lets make sure we eliminate all the basic block functions because we dont care anymore + for (auto &f : module.getFunctionList()) { + if (anvill::GetBasicBlockAddr(&f)) { + f.setLinkage(llvm::GlobalValue::InternalLinkage); + } + } + + auto intrinsics = module.getFunction("__remill_intrinsics"); + if (intrinsics) { + intrinsics->eraseFromParent(); + } + + auto defaultmpm = + pb.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O3); + + defaultmpm.run(module, mam); + } + + // Manually clear the analyses to prevent ASAN failures in the destructors. mam.clear(); fam.clear(); diff --git a/lib/Passes/InlineBasicBlockFunctions.cpp b/lib/Passes/InlineBasicBlockFunctions.cpp new file mode 100644 index 000000000..7b2b8095b --- /dev/null +++ b/lib/Passes/InlineBasicBlockFunctions.cpp @@ -0,0 +1,39 @@ +#include "anvill/Passes/InlineBasicBlockFunctions.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "Utils.h" + +namespace anvill { + +llvm::StringRef InlineBasicBlockFunctions::name(void) { + return "Inline the basic block functions"; +} + +llvm::PreservedAnalyses InlineBasicBlockFunctions::runOnBasicBlockFunction( + llvm::Function &F, llvm::FunctionAnalysisManager &AM, + const anvill::BasicBlockContext &cont) { + F.removeFnAttr(llvm::Attribute::NoInline); + F.addFnAttr(llvm::Attribute::AlwaysInline); + return llvm::PreservedAnalyses::all(); +} + +} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/RemoveAnvillReturns.cpp b/lib/Passes/RemoveAnvillReturns.cpp new file mode 100644 index 000000000..d414ee587 --- /dev/null +++ b/lib/Passes/RemoveAnvillReturns.cpp @@ -0,0 +1,65 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Utils.h" + +namespace anvill { +llvm::StringRef RemoveAnvillReturns::name(void) { + return "Remove anvill returns"; +} + +llvm::PreservedAnalyses +RemoveAnvillReturns::run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { + auto intrinsic = F.getParent()->getFunction(anvill::kAnvillBasicBlockReturn); + bool changed = false; + + if (intrinsic) { + std::vector calls; + for (auto &insn : llvm::instructions(&F)) { + if (auto cc = llvm::dyn_cast(&insn)) { + if (cc->getCalledFunction() == intrinsic) { + calls.push_back(cc); + } + } + } + + for (auto cc : calls) { + if (F.getReturnType() == cc->getArgOperand(0)->getType()) { + changed = true; + auto to_block = cc->getParent()->getTerminator(); + // block must be wellformed + CHECK(to_block); + to_block->eraseFromParent(); + + llvm::ReturnInst::Create(F.getContext(), cc->getArgOperand(0), + cc->getParent()); + cc->eraseFromParent(); + } else { + + LOG(ERROR) << "Ret ty: " << remill::LLVMThingToString(F.getReturnType()) + << " arg mismatch: " + << remill::LLVMThingToString( + cc->getArgOperand(0)->getType()); + } + } + } + + return changed ? llvm::PreservedAnalyses::none() + : llvm::PreservedAnalyses::all(); +} +} // namespace anvill \ No newline at end of file From dd89a2316c1d5ade3640aba5f727dc93c2605b67 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Sun, 12 Mar 2023 13:10:09 -0400 Subject: [PATCH 116/163] add inline option --- bin/Decompile/Main.cpp | 4 ++++ include/anvill/Lifters.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index 638124175..c8741b7d6 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -47,6 +47,9 @@ DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); DEFINE_bool(remove_next_pc_assignments, false, "Enables remove next pc assignment pass"); +DEFINE_bool(inline_basic_blocks, false, + "Enables inlining of basic blocks for high level output"); + DEFINE_string( default_callable_spec, "", @@ -160,6 +163,7 @@ int main(int argc, char *argv[]) { anvill::SpecificationControlFlowProvider cfp(spec); anvill::SpecificationMemoryProvider mp(spec); anvill::LifterOptions options(spec.Arch().get(), module, *tp.get(), cfp, mp); + options.should_inline_basic_blocks = FLAGS_inline_basic_blocks; // options.state_struct_init_procedure = // anvill::StateStructureInitializationProcedure::kNone; diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index e751ea9ea..aa416eb30 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -220,7 +220,7 @@ class LifterOptions { stack_pointer_is_signed(false), should_remove_anvill_pc(true), should_remove_assignments_to_next_pc(false), - should_inline_basic_blocks(true) { + should_inline_basic_blocks(false) { CheckModuleContextMatchesArch(); } From b7dfc5b7fa13b48128f1c193e20dcf50df854be8 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 17 Mar 2023 16:04:26 -0400 Subject: [PATCH 117/163] add namespace to pass name --- lib/Optimize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 35dc84598..4ffabe190 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -320,7 +320,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); - second_fpm.addPass(CodeQualityStatCollector()); + second_fpm.addPass(anvill::CodeQualityStatCollector()); second_fpm.addPass(llvm::VerifierPass()); AddConvertXorsToCmps(second_fpm); second_fpm.addPass(llvm::VerifierPass()); From e8448556d1de15b009bcf3fc27bb5e72b077c303 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 17 Mar 2023 16:04:58 -0400 Subject: [PATCH 118/163] Always enable statistics, collect anvill_sp stores/loads --- lib/Passes/CodeQualityStatCollector.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index c1078404e..4eeafb2dd 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -11,13 +11,14 @@ #include namespace anvill { -STATISTIC( +ALWAYS_ENABLED_STATISTIC( ConditionalComplexity, "A factor that approximates the complexity of the condition in branch instructions"); -STATISTIC(NumberOfInstructions, "Total number of instructions"); -STATISTIC(AbruptControlFlow, "Indirect control flow instructions"); -STATISTIC(IntToPointerCasts, "Integer to pointer casts"); -STATISTIC(PointerToIntCasts, "Pointer to integer casts"); +ALWAYS_ENABLED_STATISTIC(NumberOfInstructions, "Total number of instructions"); +ALWAYS_ENABLED_STATISTIC(AbruptControlFlow, "Indirect control flow instructions"); +ALWAYS_ENABLED_STATISTIC(IntToPointerCasts, "Integer to pointer casts"); +ALWAYS_ENABLED_STATISTIC(PointerToIntCasts, "Pointer to integer casts"); +ALWAYS_ENABLED_STATISTIC(AnvillStackPointers, "Anvill stack pointer"); namespace { @@ -69,6 +70,17 @@ CodeQualityStatCollector::run(llvm::Function &function, PointerToIntCasts++; } + if (auto *store_inst = llvm::dyn_cast(&i)) { + if (store_inst->getPointerOperand()->getName() == kSymbolicSPName) { + ++AnvillStackPointers; + } + } + + if (auto *load_inst = llvm::dyn_cast(&i)) { + if (load_inst->getPointerOperand()->getName() == kSymbolicSPName) { + ++AnvillStackPointers; + } + } NumberOfInstructions++; if (auto *branch = llvm::dyn_cast(&i)) { @@ -94,4 +106,4 @@ llvm::StringRef CodeQualityStatCollector::name(void) { return "CodeQualityStatCollector"; } -} // namespace anvill \ No newline at end of file +} // namespace anvill From 29fdbbd8351c35da3a45d3468be7f989f21d662f Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 17 Mar 2023 16:12:01 -0400 Subject: [PATCH 119/163] make increment consistent with llvm --- lib/Passes/CodeQualityStatCollector.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index 4eeafb2dd..aa51e4ee9 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -36,7 +36,7 @@ class ConditionalComplexityVisitor void visitBinaryOperator(llvm::BinaryOperator &I) { if (auto *inttype = llvm::dyn_cast(I.getType())) { if (inttype->getBitWidth() == 1) { - ConditionalComplexity++; + ++ConditionalComplexity; this->tryVisit(I.getOperand(0)); this->tryVisit(I.getOperand(1)); } @@ -44,12 +44,12 @@ class ConditionalComplexityVisitor } void visitCmpInst(llvm::CmpInst &I) { - ConditionalComplexity++; + ++ConditionalComplexity; } void visitUnaryOperator(llvm::UnaryOperator &I) { if (auto *inttype = llvm::dyn_cast(I.getType())) { - ConditionalComplexity++; + ++ConditionalComplexity; this->tryVisit(I.getOperand(0)); } } @@ -63,11 +63,11 @@ CodeQualityStatCollector::run(llvm::Function &function, ConditionalComplexityVisitor complexity_visitor; for (auto &i : llvm::instructions(function)) { if (auto *int_to_ptr = llvm::dyn_cast(&i)) { - IntToPointerCasts++; + ++IntToPointerCasts; } if (auto *int_to_ptr = llvm::dyn_cast(&i)) { - PointerToIntCasts++; + ++PointerToIntCasts; } if (auto *store_inst = llvm::dyn_cast(&i)) { @@ -82,7 +82,7 @@ CodeQualityStatCollector::run(llvm::Function &function, } } - NumberOfInstructions++; + ++NumberOfInstructions; if (auto *branch = llvm::dyn_cast(&i)) { if (branch->isConditional()) { complexity_visitor.tryVisit(branch->getCondition()); @@ -94,7 +94,7 @@ CodeQualityStatCollector::run(llvm::Function &function, if (target != nullptr) { if (target->getName() == kAnvillSwitchCompleteFunc || target->getName() == kAnvillSwitchIncompleteFunc) { - AbruptControlFlow++; + ++AbruptControlFlow; } } } From 7d68247c09f50c214f607535ff40226ca87f51dc Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Tue, 21 Mar 2023 17:44:30 -0400 Subject: [PATCH 120/163] count references to anvill stack/pc by counting uses --- lib/Passes/CodeQualityStatCollector.cpp | 40 +++++++++++++++++-------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index aa51e4ee9..ea12a145d 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -19,6 +19,7 @@ ALWAYS_ENABLED_STATISTIC(AbruptControlFlow, "Indirect control flow instructions" ALWAYS_ENABLED_STATISTIC(IntToPointerCasts, "Integer to pointer casts"); ALWAYS_ENABLED_STATISTIC(PointerToIntCasts, "Pointer to integer casts"); ALWAYS_ENABLED_STATISTIC(AnvillStackPointers, "Anvill stack pointer"); +ALWAYS_ENABLED_STATISTIC(AnvillPCPointers, "Anvill pc pointer"); namespace { @@ -61,6 +62,33 @@ llvm::PreservedAnalyses CodeQualityStatCollector::run(llvm::Function &function, llvm::FunctionAnalysisManager &analysisManager) { ConditionalComplexityVisitor complexity_visitor; + llvm::GlobalVariable* anvill_sp = function.getParent()->getGlobalVariable(kSymbolicSPName); + llvm::GlobalVariable* anvill_pc = function.getParent()->getGlobalVariable(kSymbolicPCName); + + if (anvill_sp != nullptr) { + for (const auto &U: anvill_sp->uses()) { + const auto &user = U.getUser(); + if (const llvm::Instruction *I = llvm::dyn_cast(user)) { + if (I->getFunction() == &function) { + ++AnvillStackPointers; + I->dump(); + } + } + } + } + + if (anvill_pc != nullptr) { + for (const auto &U: anvill_pc->uses()) { + const auto &user = U.getUser(); + if (const llvm::Instruction *I = llvm::dyn_cast(user)) { + if (I->getFunction() == &function) { + ++AnvillPCPointers; + I->dump(); + } + } + } + } + for (auto &i : llvm::instructions(function)) { if (auto *int_to_ptr = llvm::dyn_cast(&i)) { ++IntToPointerCasts; @@ -70,18 +98,6 @@ CodeQualityStatCollector::run(llvm::Function &function, ++PointerToIntCasts; } - if (auto *store_inst = llvm::dyn_cast(&i)) { - if (store_inst->getPointerOperand()->getName() == kSymbolicSPName) { - ++AnvillStackPointers; - } - } - - if (auto *load_inst = llvm::dyn_cast(&i)) { - if (load_inst->getPointerOperand()->getName() == kSymbolicSPName) { - ++AnvillStackPointers; - } - } - ++NumberOfInstructions; if (auto *branch = llvm::dyn_cast(&i)) { if (branch->isConditional()) { From 4a229d9f256fd7ff692b48dd997647bfb7af30bb Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 22 Mar 2023 09:33:26 -0400 Subject: [PATCH 121/163] convert to module pass --- .../anvill/Passes/CodeQualityStatCollector.h | 6 +- lib/Optimize.cpp | 2 +- lib/Passes/CodeQualityStatCollector.cpp | 56 +++++++++---------- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/include/anvill/Passes/CodeQualityStatCollector.h b/include/anvill/Passes/CodeQualityStatCollector.h index 16c082864..75ed4ef9a 100644 --- a/include/anvill/Passes/CodeQualityStatCollector.h +++ b/include/anvill/Passes/CodeQualityStatCollector.h @@ -7,9 +7,9 @@ namespace anvill { class CodeQualityStatCollector : public llvm::PassInfoMixin { public: - llvm::PreservedAnalyses run(llvm::Function &function, - llvm::FunctionAnalysisManager &analysisManager); + llvm::PreservedAnalyses run(llvm::Module &module, + llvm::ModuleAnalysisManager &analysisManager); static llvm::StringRef name(void); }; -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 4ffabe190..9506fbb08 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -320,7 +320,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, second_fpm.addPass(llvm::VerifierPass()); AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); - second_fpm.addPass(anvill::CodeQualityStatCollector()); second_fpm.addPass(llvm::VerifierPass()); AddConvertXorsToCmps(second_fpm); second_fpm.addPass(llvm::VerifierPass()); @@ -331,6 +330,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(second_fpm))); + mpm.addPass(anvill::CodeQualityStatCollector()); mpm.run(module, mam); // Get rid of all final uses of `__anvill_pc`. diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index ea12a145d..5153609d8 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -59,20 +59,17 @@ class ConditionalComplexityVisitor llvm::PreservedAnalyses -CodeQualityStatCollector::run(llvm::Function &function, - llvm::FunctionAnalysisManager &analysisManager) { +CodeQualityStatCollector::run(llvm::Module &module, + llvm::ModuleAnalysisManager &analysisManager) { ConditionalComplexityVisitor complexity_visitor; - llvm::GlobalVariable* anvill_sp = function.getParent()->getGlobalVariable(kSymbolicSPName); - llvm::GlobalVariable* anvill_pc = function.getParent()->getGlobalVariable(kSymbolicPCName); + llvm::GlobalVariable* anvill_sp = module.getGlobalVariable(kSymbolicSPName); + llvm::GlobalVariable* anvill_pc = module.getGlobalVariable(kSymbolicPCName); if (anvill_sp != nullptr) { for (const auto &U: anvill_sp->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - if (I->getFunction() == &function) { - ++AnvillStackPointers; - I->dump(); - } + ++AnvillStackPointers; } } } @@ -81,36 +78,35 @@ CodeQualityStatCollector::run(llvm::Function &function, for (const auto &U: anvill_pc->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - if (I->getFunction() == &function) { - ++AnvillPCPointers; - I->dump(); - } + ++AnvillPCPointers; } } } - for (auto &i : llvm::instructions(function)) { - if (auto *int_to_ptr = llvm::dyn_cast(&i)) { - ++IntToPointerCasts; - } + for (auto &function : module) { + for (auto &i : llvm::instructions(function)) { + if (auto *int_to_ptr = llvm::dyn_cast(&i)) { + ++IntToPointerCasts; + } - if (auto *int_to_ptr = llvm::dyn_cast(&i)) { - ++PointerToIntCasts; - } + if (auto *int_to_ptr = llvm::dyn_cast(&i)) { + ++PointerToIntCasts; + } - ++NumberOfInstructions; - if (auto *branch = llvm::dyn_cast(&i)) { - if (branch->isConditional()) { - complexity_visitor.tryVisit(branch->getCondition()); + ++NumberOfInstructions; + if (auto *branch = llvm::dyn_cast(&i)) { + if (branch->isConditional()) { + complexity_visitor.tryVisit(branch->getCondition()); + } } - } - if (auto *cb = llvm::dyn_cast(&i)) { - auto target = cb->getCalledFunction(); - if (target != nullptr) { - if (target->getName() == kAnvillSwitchCompleteFunc || - target->getName() == kAnvillSwitchIncompleteFunc) { - ++AbruptControlFlow; + if (auto *cb = llvm::dyn_cast(&i)) { + auto target = cb->getCalledFunction(); + if (target != nullptr) { + if (target->getName() == kAnvillSwitchCompleteFunc || + target->getName() == kAnvillSwitchIncompleteFunc) { + ++AbruptControlFlow; + } } } } From bd646946edde8f02d319197bb384dd3ca4b91356 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 22 Mar 2023 12:44:40 -0400 Subject: [PATCH 122/163] count number of functions that contain stack/pc --- lib/Passes/CodeQualityStatCollector.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index 5153609d8..b2bc7d79f 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -18,8 +18,8 @@ ALWAYS_ENABLED_STATISTIC(NumberOfInstructions, "Total number of instructions"); ALWAYS_ENABLED_STATISTIC(AbruptControlFlow, "Indirect control flow instructions"); ALWAYS_ENABLED_STATISTIC(IntToPointerCasts, "Integer to pointer casts"); ALWAYS_ENABLED_STATISTIC(PointerToIntCasts, "Pointer to integer casts"); -ALWAYS_ENABLED_STATISTIC(AnvillStackPointers, "Anvill stack pointer"); -ALWAYS_ENABLED_STATISTIC(AnvillPCPointers, "Anvill pc pointer"); +ALWAYS_ENABLED_STATISTIC(AnvillStackPointers, "Number of functions that expose an Anvill stack pointer"); +ALWAYS_ENABLED_STATISTIC(AnvillPCPointers, "Number of functions that expose an Anvill pc pointer"); namespace { @@ -65,11 +65,17 @@ CodeQualityStatCollector::run(llvm::Module &module, llvm::GlobalVariable* anvill_sp = module.getGlobalVariable(kSymbolicSPName); llvm::GlobalVariable* anvill_pc = module.getGlobalVariable(kSymbolicPCName); + llvm::DenseSet sp_funcs; + llvm::DenseSet pc_funcs; + if (anvill_sp != nullptr) { for (const auto &U: anvill_sp->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - ++AnvillStackPointers; + if (!sp_funcs.count(I->getFunction())) { + ++AnvillStackPointers; + sp_funcs.insert(I->getFunction()); + } } } } @@ -78,7 +84,10 @@ CodeQualityStatCollector::run(llvm::Module &module, for (const auto &U: anvill_pc->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - ++AnvillPCPointers; + if (!pc_funcs.count(I->getFunction())) { + ++AnvillPCPointers; + pc_funcs.insert(I->getFunction()); + } } } } From 3ebbffbe7a94801a40f82f35f1b24b76d5976140 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 22 Mar 2023 14:06:33 -0400 Subject: [PATCH 123/163] simplify convoluted logic --- lib/Passes/CodeQualityStatCollector.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index b2bc7d79f..bf8606fcb 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -72,26 +72,24 @@ CodeQualityStatCollector::run(llvm::Module &module, for (const auto &U: anvill_sp->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - if (!sp_funcs.count(I->getFunction())) { - ++AnvillStackPointers; - sp_funcs.insert(I->getFunction()); - } + sp_funcs.insert(I->getFunction()); } } } + AnvillStackPointers = sp_funcs.size(); + if (anvill_pc != nullptr) { for (const auto &U: anvill_pc->uses()) { const auto &user = U.getUser(); if (const llvm::Instruction *I = llvm::dyn_cast(user)) { - if (!pc_funcs.count(I->getFunction())) { - ++AnvillPCPointers; - pc_funcs.insert(I->getFunction()); - } + pc_funcs.insert(I->getFunction()); } } } + AnvillPCPointers = pc_funcs.size(); + for (auto &function : module) { for (auto &i : llvm::instructions(function)) { if (auto *int_to_ptr = llvm::dyn_cast(&i)) { From 233cbd7f5e04cc405b339ca823dff85c5f267dfd Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 22 Mar 2023 14:19:36 -0400 Subject: [PATCH 124/163] accumlate total for all modules --- lib/Passes/CodeQualityStatCollector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Passes/CodeQualityStatCollector.cpp b/lib/Passes/CodeQualityStatCollector.cpp index bf8606fcb..ed51004c7 100644 --- a/lib/Passes/CodeQualityStatCollector.cpp +++ b/lib/Passes/CodeQualityStatCollector.cpp @@ -77,7 +77,7 @@ CodeQualityStatCollector::run(llvm::Module &module, } } - AnvillStackPointers = sp_funcs.size(); + AnvillStackPointers += sp_funcs.size(); if (anvill_pc != nullptr) { for (const auto &U: anvill_pc->uses()) { @@ -88,7 +88,7 @@ CodeQualityStatCollector::run(llvm::Module &module, } } - AnvillPCPointers = pc_funcs.size(); + AnvillPCPointers += pc_funcs.size(); for (auto &function : module) { for (auto &i : llvm::instructions(function)) { From 512d0c2358fad0c3083fa2f7b1f3d114281f14fb Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 27 Mar 2023 13:11:37 -0400 Subject: [PATCH 125/163] Adds back splitting the stack around the return address preventing exposing the return address pointer (#365) * split stack reenabled * fix return replacement for void returns * add no return to call * Remove unneded include * clone self... --------- Co-authored-by: Francesco Bertolaccini --- .github/workflows/build.yml | 2 +- include/anvill/ABI.h | 4 + lib/ABI.cpp | 2 + lib/Lifters/BasicBlockLifter.cpp | 8 +- lib/Lifters/FunctionLifter.cpp | 4 + lib/Optimize.cpp | 11 ++ lib/Passes/RemoveAnvillReturns.cpp | 26 ++- lib/Passes/SplitStackFrameAtReturnAddress.cpp | 173 ++++++++---------- lib/Protobuf.cpp | 1 + 9 files changed, 124 insertions(+), 107 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 294a08495..d0fadf222 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -157,7 +157,7 @@ jobs: path: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 repository: "trailofbits/irene3" fetch-depth: 0 - submodules: false + submodules: true ssh-key: "${{ secrets.IRENE3_DEPLOY }}" - name: Add cargo bin to path diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index 2636ca913..02cf9e302 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -82,6 +82,10 @@ extern const std::string kAnvillDataProvenanceFunc; // `alloca`. extern const std::string kAnvillStackZero; +// The alloca for the abstract stack prior to splitting at the return address. TODO(Ian): maybe we are fine to +// use this to queue off of then just move it after the split +extern const std::string kStackMetadata; + extern const std::string kBasicBlockMetadata; diff --git a/lib/ABI.cpp b/lib/ABI.cpp index 59220129b..70b35e410 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -84,6 +84,8 @@ const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); +const std::string kStackMetadata(kAnvillNamePrefix + "stack_alloc"); + const std::string kAnvillBasicBlockReturn(kAnvillNamePrefix + "basic_block_function_return"); diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index ce72bdc94..790cf7b18 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -176,15 +176,17 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( // only handle inter-proc since intra-proc are handled implicitly by the CFG. llvm::IRBuilder<> builder(block); if (std::holds_alternative(override)) { + auto cc = std::get(override); + llvm::CallInst *call = nullptr; if (cc.target_address.has_value()) { - this->AddCallFromBasicBlockFunctionToLifted( + call = this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics, this->options.program_counter_init_procedure( builder, this->address_type, *cc.target_address)); } else { - this->AddCallFromBasicBlockFunctionToLifted( + call = this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics); } if (!cc.stop) { @@ -194,6 +196,8 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( builder.CreateStore(raddr, npc); builder.CreateStore(raddr, pc); } else { + call->setDoesNotReturn(); + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); } return !cc.stop; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index a95a8405c..9ac700549 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -533,6 +533,10 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { auto abstract_stack = ir.CreateAlloca( AbstractStack::StackTypeFromSize(llvm_context, decl.maximum_depth), nullptr, "abstract_stack"); + + abstract_stack->setMetadata(kStackMetadata, + llvm::MDNode::get(llvm_context, {})); + // Add a branch between the first block of the lifted function, which sets // up some local variables, and the block that will contain the lifted // instruction. diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 9506fbb08..d5655f064 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -79,6 +79,7 @@ #include #include "anvill/Passes/RemoveAnvillReturns.h" +#include "anvill/Passes/SplitStackFrameAtReturnAddress.h" #include "anvill/Specification.h" namespace anvill { @@ -354,6 +355,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, llvm::createModuleToPostOrderCGSCCPassAdaptor(llvm::InlinerPass())); llvm::FunctionPassManager rm_returns; rm_returns.addPass(anvill::RemoveAnvillReturns()); + mpminliner.addPass( llvm::createModuleToFunctionPassAdaptor(std::move(rm_returns))); @@ -371,10 +373,19 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, intrinsics->eraseFromParent(); } + auto defaultmpm = pb.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O3); defaultmpm.run(module, mam); + + llvm::createModuleToFunctionPassAdaptor( + SplitStackFrameAtReturnAddress(options.stack_frame_recovery_options)) + .run(module, mam); + + + pb.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O3) + .run(module, mam); } diff --git a/lib/Passes/RemoveAnvillReturns.cpp b/lib/Passes/RemoveAnvillReturns.cpp index d414ee587..4ad2f6234 100644 --- a/lib/Passes/RemoveAnvillReturns.cpp +++ b/lib/Passes/RemoveAnvillReturns.cpp @@ -39,22 +39,34 @@ RemoveAnvillReturns::run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { } for (auto cc : calls) { - if (F.getReturnType() == cc->getArgOperand(0)->getType()) { + // either it's a void return with no args or there is 1 arg that is the type of the return + if ((F.getReturnType()->isVoidTy() && cc->arg_size() == 0) || + (cc->arg_size() == 1 && + F.getReturnType() == cc->getArgOperand(0)->getType())) { changed = true; auto to_block = cc->getParent()->getTerminator(); // block must be wellformed CHECK(to_block); to_block->eraseFromParent(); - llvm::ReturnInst::Create(F.getContext(), cc->getArgOperand(0), - cc->getParent()); + + if (F.getReturnType()->isVoidTy()) { + llvm::ReturnInst::Create(F.getContext(), cc->getParent()); + } else { + llvm::ReturnInst::Create(F.getContext(), cc->getArgOperand(0), + cc->getParent()); + } + cc->eraseFromParent(); } else { - LOG(ERROR) << "Ret ty: " << remill::LLVMThingToString(F.getReturnType()) - << " arg mismatch: " - << remill::LLVMThingToString( - cc->getArgOperand(0)->getType()); + LOG_IF(ERROR, cc->arg_size() == 1) + << "Ret ty: " << remill::LLVMThingToString(F.getReturnType()) + << " arg mismatch: " + << remill::LLVMThingToString(cc->getArgOperand(0)->getType()); + LOG_IF(ERROR, cc->arg_size() == 0) + << "Expected void type for function with type: " + << remill::LLVMThingToString(F.getReturnType()); } } } diff --git a/lib/Passes/SplitStackFrameAtReturnAddress.cpp b/lib/Passes/SplitStackFrameAtReturnAddress.cpp index cfe8198ea..d9fd8cf56 100644 --- a/lib/Passes/SplitStackFrameAtReturnAddress.cpp +++ b/lib/Passes/SplitStackFrameAtReturnAddress.cpp @@ -6,10 +6,9 @@ * the LICENSE file found in the root directory of this source tree. */ -#include - #include #include +#include #include #include #include @@ -22,6 +21,7 @@ #include #include + #include "Utils.h" namespace anvill { @@ -31,21 +31,10 @@ namespace { static llvm::AllocaInst *FindStackFrameAlloca(llvm::Function &func) { for (auto &inst : func.getEntryBlock()) { auto alloca = llvm::dyn_cast(&inst); - if (!alloca) { + if (!alloca || !alloca->hasMetadata(kStackMetadata)) { continue; } - auto frame_type = llvm::dyn_cast( - alloca->getAllocatedType()); - if (!frame_type || frame_type->isLiteral()) { - continue; - } - - auto frame_name = frame_type->getName(); - if (!frame_name.startswith(func.getName()) || - !frame_name.endswith(kStackFrameTypeNameSuffix)) { - continue; - } return alloca; } @@ -59,8 +48,8 @@ struct FixedOffsetUse { }; // Find all (indirect) uses of the stack frame allocation. -static std::vector FindFixedOffsetUses( - llvm::AllocaInst *alloca) { +static std::vector +FindFixedOffsetUses(llvm::AllocaInst *alloca) { const llvm::DataLayout &dl = alloca->getModule()->getDataLayout(); const auto addr_size = dl.getIndexSizeInBits(0); @@ -70,8 +59,7 @@ static std::vector FindFixedOffsetUses( std::vector> work_list; work_list.emplace_back(alloca, llvm::APInt(addr_size, 0u, true)); - auto add_to_found = [&found] (llvm::Use &use, - llvm::APInt offset) { + auto add_to_found = [&found](llvm::Use &use, llvm::APInt offset) { FixedOffsetUse fou; fou.offset = std::move(offset); fou.use = &use; @@ -95,8 +83,7 @@ static std::vector FindFixedOffsetUses( } switch (user_inst->getOpcode()) { - default: - break; + default: break; case llvm::Instruction::BitCast: case llvm::Instruction::PtrToInt: case llvm::Instruction::IntToPtr: @@ -136,24 +123,24 @@ static void AnnotateStackUses(llvm::AllocaInst *frame_alloca, return; } - auto stack_offset_md_id = context.getMDKindID( - options.stack_offset_metadata_name); + auto stack_offset_md_id = + context.getMDKindID(options.stack_offset_metadata_name); auto zero_offset = zero_val->getSExtValue(); - auto create_metadata = - [=, &context] (llvm::Instruction *inst, int64_t offset) { - int64_t disp = 0; - if (options.stack_grows_down) { - disp = zero_offset - offset; - } else { - disp = offset - zero_offset; - } + auto create_metadata = [=, &context](llvm::Instruction *inst, + int64_t offset) { + int64_t disp = 0; + if (options.stack_grows_down) { + disp = zero_offset - offset; + } else { + disp = offset - zero_offset; + } - auto disp_val = llvm::ConstantInt::get( - zero_val->getType(), static_cast(disp), true); - auto disp_md = llvm::ValueAsMetadata::get(disp_val); - return llvm::MDNode::get(context, disp_md); - }; + auto disp_val = llvm::ConstantInt::get(zero_val->getType(), + static_cast(disp), true); + auto disp_md = llvm::ValueAsMetadata::get(disp_val); + return llvm::MDNode::get(context, disp_md); + }; // Annotate the used instructions. for (const auto &use : uses) { @@ -173,9 +160,9 @@ static void AnnotateStackUses(llvm::AllocaInst *frame_alloca, // Find a `StoreInst` that looks like it puts the return address into the // stack. Failure to find this means it likely stayed in registers. -static const FixedOffsetUse *FindReturnAddressStore( - const std::vector &uses, - const StackFrameRecoveryOptions &options) { +static const FixedOffsetUse * +FindReturnAddressStore(const std::vector &uses, + const StackFrameRecoveryOptions &options) { const FixedOffsetUse *found = nullptr; for (const auto &use : uses) { if (auto store = llvm::dyn_cast(use.use->getUser())) { @@ -258,13 +245,8 @@ static llvm::Instruction *DemandedOffset( ptr_type = llvm::PointerType::get(context, 0); scale = 2; break; - case 0: - el_type = llvm::Type::getIntNTy(context, addr_size * 8u); - break; - default: - LOG(FATAL) - << "Unsupported address size: " << addr_size; - break; + case 0: el_type = llvm::Type::getIntNTy(context, addr_size * 8u); break; + default: LOG(FATAL) << "Unsupported address size: " << addr_size; break; } auto base = pointers[addr_size]; @@ -367,8 +349,8 @@ static void SubstituteUse( // If the user is a `load`, then replace its use of the pointer. case llvm::Instruction::Load: { auto li = llvm::dyn_cast(user_inst); - auto pty = llvm::PointerType::get( - ir.getContext(), li->getPointerAddressSpace()); + auto pty = + llvm::PointerType::get(ir.getContext(), li->getPointerAddressSpace()); auto bc = ir.CreateBitOrPointerCast(ret, pty); CopyMetadataTo(use_inst, bc); use->set(bc); @@ -392,9 +374,10 @@ static void SubstituteUse( use->set(bc); } - // Operating on the pointer. + // Operating on the pointer. } else { - auto pty = llvm::PointerType::get(ir.getContext(), si->getPointerAddressSpace()); + auto pty = llvm::PointerType::get(ir.getContext(), + si->getPointerAddressSpace()); auto bc = ir.CreateBitOrPointerCast(ret, pty); CopyMetadataTo(use_inst, bc); use->set(bc); @@ -414,7 +397,7 @@ static void SubstituteUse( to_replace.emplace(user_inst, bc); } - // This is trickier; we need to form a new GEP or something like it. + // This is trickier; we need to form a new GEP or something like it. } else { llvm::SmallVector const_indices_c; llvm::SmallVector const_indices; @@ -439,13 +422,12 @@ static void SubstituteUse( // This is the easy case, because we can replace the use with // something that was constant calculated. if (const_indices.empty()) { - auto pty = llvm::PointerType::get( - ir.getContext(), addr_space); + auto pty = llvm::PointerType::get(ir.getContext(), addr_space); auto bc = ir.CreateBitOrPointerCast(ret, pty); CopyMetadataTo(use_inst, bc); use->set(bc); - // This is the hard case, because we need to invent a new GEP. + // This is the hard case, because we need to invent a new GEP. } else if (!to_replace.count(user_inst)) { llvm::APInt sub_offset(addr_size * 8u, 0u); auto source_ty = gep->getSourceElementType(); @@ -454,19 +436,18 @@ static void SubstituteUse( source_ty, const_indices_c, dl, sub_offset)); auto effective_sub_offset = static_cast( - static_cast(offset) + - sub_offset.getSExtValue()); - llvm::Instruction *const sub_ret = DemandedOffset( - ir, use_inst, pointers, computed_offsets, - effective_sub_offset, addr_size); + static_cast(offset) + sub_offset.getSExtValue()); + llvm::Instruction *const sub_ret = + DemandedOffset(ir, use_inst, pointers, computed_offsets, + effective_sub_offset, addr_size); CHECK_NOTNULL(sub_ret); CopyMetadataTo(use_inst, sub_ret); - auto sub_ret_ty = llvm::GetElementPtrInst::getIndexedType( - source_ty, const_indices); - auto sub_ret_pty = llvm::PointerType::get( - ir.getContext(), addr_space); + auto sub_ret_ty = + llvm::GetElementPtrInst::getIndexedType(source_ty, const_indices); + auto sub_ret_pty = + llvm::PointerType::get(ir.getContext(), addr_space); auto bc = ir.CreateBitOrPointerCast(ret, sub_ret_pty); CopyMetadataTo(user_inst, bc); @@ -482,17 +463,17 @@ static void SubstituteUse( } } -static void SplitStackFrameAround( - llvm::AllocaInst *frame_alloca, std::vector uses, - const StackFrameRecoveryOptions &options) { +static void SplitStackFrameAround(llvm::AllocaInst *frame_alloca, + std::vector uses, + const StackFrameRecoveryOptions &options) { llvm::LLVMContext &context = frame_alloca->getContext(); - llvm::Module * const module = frame_alloca->getModule(); + llvm::Module *const module = frame_alloca->getModule(); const llvm::DataLayout &dl = module->getDataLayout(); const auto addr_size = dl.getPointerSize(0); const auto addr_size_bits = dl.getPointerSizeInBits(0); - llvm::IntegerType * const addr_type = llvm::Type::getIntNTy( - context, addr_size * 8u); + llvm::IntegerType *const addr_type = + llvm::Type::getIntNTy(context, addr_size * 8u); // If we don't find a return address store, then we'll still split at zero. // @@ -512,16 +493,14 @@ static void SplitStackFrameAround( end_of_ra = offset_of_ra + addr_size; // Log the above scenario out in case it comes up. - if (auto user_inst = llvm::dyn_cast( - store_use->use->getUser()); + if (auto user_inst = + llvm::dyn_cast(store_use->use->getUser()); user_inst && offset_of_ra != 0) { - LOG(INFO) - << "Offset of return address storage location in function " - << frame_alloca->getFunction()->getName().str() - << " is " << offset_of_ra << ": " - << remill::LLVMThingToString(user_inst) - << " in block " << user_inst->getParent()->getName().str(); + LOG(INFO) << "Offset of return address storage location in function " + << frame_alloca->getFunction()->getName().str() << " is " + << offset_of_ra << ": " << remill::LLVMThingToString(user_inst) + << " in block " << user_inst->getParent()->getName().str(); } } @@ -553,29 +532,29 @@ static void SplitStackFrameAround( std::unordered_map computed_offsets; std::unordered_map to_replace; - auto make_subframe = [&] ( - std::vector> use_offsets, - const char *down_name, const char *up_name, uint64_t num_slots) { - auto num_slots_val = ir.getIntN(addr_size_bits, num_slots); - if (options.stack_grows_down) { - sub_frame = ir.CreateAlloca(addr_type, 0u, num_slots_val, down_name); - } else { - sub_frame = ir.CreateAlloca(addr_type, 0u, num_slots_val, up_name); - } + auto make_subframe = + [&](std::vector> use_offsets, + const char *down_name, const char *up_name, uint64_t num_slots) { + auto num_slots_val = ir.getIntN(addr_size_bits, num_slots); + if (options.stack_grows_down) { + sub_frame = ir.CreateAlloca(addr_type, 0u, num_slots_val, down_name); + } else { + sub_frame = ir.CreateAlloca(addr_type, 0u, num_slots_val, up_name); + } - pointers.clear(); - computed_offsets.clear(); + pointers.clear(); + computed_offsets.clear(); - pointers.emplace(addr_size, sub_frame); - computed_offsets.emplace(0, sub_frame); + pointers.emplace(addr_size, sub_frame); + computed_offsets.emplace(0, sub_frame); - CopyMetadataTo(frame_alloca, sub_frame); + CopyMetadataTo(frame_alloca, sub_frame); - for (auto [use, offset] : use_offsets) { - SubstituteUse(ir, use, offset, addr_size, pointers, - computed_offsets, to_replace); - } - }; + for (auto [use, offset] : use_offsets) { + SubstituteUse(ir, use, offset, addr_size, pointers, computed_offsets, + to_replace); + } + }; if (!above.empty()) { auto num_slots = (offset_of_ra + (addr_size - 1u)) / addr_size; @@ -583,8 +562,8 @@ static void SplitStackFrameAround( } if (!below.empty()) { - auto frame_size = dl.getTypeAllocSize( - frame_alloca->getAllocatedType()).getKnownMinSize(); + auto frame_size = + dl.getTypeAllocSize(frame_alloca->getAllocatedType()).getKnownMinSize(); auto num_slots = ((frame_size - end_of_ra) + (addr_size - 1u)) / addr_size; make_subframe(std::move(below), "locals", "parameters", num_slots); } diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 610c64cd9..dd6964a68 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -44,6 +44,7 @@ Result ProtobufTranslator::ParseIntoCallableDecl( CallableDecl &decl) const { decl.arch = arch; decl.is_noreturn = function.is_noreturn(); + decl.is_variadic = function.is_variadic(); decl.calling_convention = static_cast(function.calling_convention()); From df58f86435548bcd3930d8daa1d741189b701599 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Wed, 29 Mar 2023 18:45:00 +0200 Subject: [PATCH 126/163] Tail call control flow (#364) * Always pass all stack vars and regs to bb funcs * Read used registers from spec * Port commit from old PR * Fix ownership bug * Tail call by passing all the arguments * Fix compile error * Fix empty basic block function declarations * Use locally allocated storage for next_pc * Optimize after removing intrinsics to improve control flow * Lift function returns to native returns * look at all in scope vars at once * filter through all variables * only recover references for variables in scope in the block * add back permissiveness related to lives * Remove unused parameter * Remove unused passes * Use named constants for argument indices * Bump anvill to C++20 * Use default comparison when searching parameters * create empty decls as needed for bad edges * remove stale test script * Don't share basic blocks between functions * Relax type spec comparison --------- Co-authored-by: 2over12 --- CMakeLists.txt | 2 +- bin/Decompile/Main.cpp | 5 - bin/Decompile/tests/scripts/roundtrip.py | 176 ------------ data_specifications/specification.proto | 2 + include/anvill/Declarations.h | 26 +- include/anvill/Lifters.h | 3 - include/anvill/Passes/BasicBlockPass.h | 10 +- .../Passes/ConvertPointerArithmeticToGEP.h | 4 +- .../anvill/Passes/InlineBasicBlockFunctions.h | 12 +- .../anvill/Passes/RemoveAssignmentsToNextPC.h | 33 --- .../Passes/RemoveRemillFunctionReturns.h | 63 ----- ...FunctionReturnsWithAnvillFunctionReturns.h | 30 -- .../anvill/Passes/ReplaceStackReferences.h | 7 +- include/anvill/Specification.h | 4 + include/anvill/Type.h | 18 ++ include/anvill/Utils.h | 4 - lib/CMakeLists.txt | 4 - lib/Declarations.cpp | 51 ++-- lib/Lifters/BasicBlockLifter.cpp | 266 ++++++++++-------- lib/Lifters/BasicBlockLifter.h | 56 ++-- lib/Lifters/FunctionLifter.cpp | 137 ++++----- lib/Lifters/FunctionLifter.h | 40 +-- lib/Optimize.cpp | 18 +- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 3 +- lib/Passes/InlineBasicBlockFunctions.cpp | 3 +- lib/Passes/RemoveAnvillReturns.cpp | 77 ----- lib/Passes/RemoveAssignmentsToNextPC.cpp | 122 -------- lib/Passes/RemoveRemillFunctionReturns.cpp | 251 ----------------- ...nctionReturnsWithAnvillFunctionReturns.cpp | 79 ------ lib/Passes/ReplaceStackReferences.cpp | 12 +- lib/Protobuf.cpp | 10 + lib/Specification.cpp | 8 +- lib/Type.cpp | 22 ++ lib/Utils.cpp | 8 - 34 files changed, 400 insertions(+), 1166 deletions(-) delete mode 100755 bin/Decompile/tests/scripts/roundtrip.py delete mode 100644 include/anvill/Passes/RemoveAssignmentsToNextPC.h delete mode 100644 include/anvill/Passes/RemoveRemillFunctionReturns.h delete mode 100644 include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h delete mode 100644 lib/Passes/RemoveAnvillReturns.cpp delete mode 100644 lib/Passes/RemoveAssignmentsToNextPC.cpp delete mode 100644 lib/Passes/RemoveRemillFunctionReturns.cpp delete mode 100644 lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f4ad622f..c1a848ecc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,7 +56,7 @@ if(ANVILL_ENABLE_INSTALL) endif(ANVILL_ENABLE_INSTALL) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_EXTENSIONS OFF) if(ANVILL_ENABLE_TESTS) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index c8741b7d6..b74245233 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -45,8 +45,6 @@ DEFINE_bool(add_breakpoints, false, DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); -DEFINE_bool(remove_next_pc_assignments, false, - "Enables remove next pc assignment pass"); DEFINE_bool(inline_basic_blocks, false, "Enables inlining of basic blocks for high level output"); @@ -178,9 +176,6 @@ int main(int argc, char *argv[]) { options.stack_frame_recovery_options.stack_offset_metadata_name = "stack_offset"; - options.should_remove_assignments_to_next_pc = - FLAGS_remove_next_pc_assignments; - anvill::EntityLifter lifter(options); std::unordered_map names; diff --git a/bin/Decompile/tests/scripts/roundtrip.py b/bin/Decompile/tests/scripts/roundtrip.py deleted file mode 100755 index d3d2c6995..000000000 --- a/bin/Decompile/tests/scripts/roundtrip.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 - -# -# Copyright (c) 2019-present, Trail of Bits, Inc. -# All rights reserved. -# -# This source code is licensed in accordance with the terms specified in -# the LICENSE file found in the root directory of this source tree. -# - -import unittest -import subprocess -import argparse -import tempfile -import os -import platform -import sys -import shutil - - -class RunError(Exception): - def __init__(self, msg): - self.msg = msg - - def __str__(self): - return str(self.msg) - - -def write_command_log(cmd_description, cmd_exec, ws): - with open(os.path.join(ws, "commands.log"), "a") as cmdlog: - if cmd_description: - cmdlog.write(f"# {cmd_description}\n") - cmdlog.write(f"{cmd_exec}\n") - - -def run_cmd(cmd, timeout, description, ws): - try: - exec_cmd = f"{' '.join(cmd)}" - sys.stdout.write(f"Running: {exec_cmd}\n") - write_command_log(description, exec_cmd, ws) - p = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - timeout=timeout, - universal_newlines=True, - ) - except FileNotFoundError as e: - raise RunError('Error: No such file or directory: "' + - e.filename + '"') - except PermissionError as e: - raise RunError('Error: File "' + e.filename + - '" is not an executable.') - - return p - - -def compile(self, clang, input, output, timeout, ws, options=None): - cmd = [] - cmd.append(clang) - if options is not None: - cmd.extend(options) - cmd.extend([input, "-o", output]) - p = run_cmd( - cmd, timeout, description="Original source Clang compile command", ws=ws) - - self.assertEqual(p.returncode, 0, "clang failure") - self.assertEqual( - len(p.stderr), 0, "errors or warnings during compilation: %s" % p.stderr - ) - - return p - - -def specify(self, specifier, input, output, timeout, ws): - cmd = list(specifier) if isinstance(specifier, list) else [specifier] - cmd.extend(["--bin_in", input]) - cmd.extend(["--spec_out", output]) - cmd.extend(["--entrypoint", "main"]) - - p = run_cmd(cmd, timeout, description="Spec generation command", ws=ws) - - self.assertEqual(p.returncode, 0, "specifier failure: %s" % p.stderr) - self.assertEqual( - len(p.stderr), 0, "errors or warnings during specification: %s" % p.stderr - ) - - return p - - -def decompile(self, decompiler, input, output, timeout, ws): - cmd = [decompiler] - cmd.extend(["--spec", input]) - cmd.extend(["--bc_out", output]) - cmd.extend(["-add_names"]) - p = run_cmd(cmd, timeout, description="Decompilation command", ws=ws) - - self.assertEqual(p.returncode, 0, "decompiler failure: %s" % p.stderr) - self.assertEqual( - len(p.stderr), 0, "errors or warnings during decompilation: %s" % p.stderr - ) - - return p - - -def roundtrip(self, specifier, decompiler, filename, testname, clang, timeout, workspace): - - # Python refuses to add delete=False to the TemporaryDirectory constructor - # with tempfile.TemporaryDirectory(prefix=f"{testname}_", dir=workspace) as tempdir: - tempdir = tempfile.mkdtemp(prefix=f"{testname}_", dir=workspace) - - compiled = os.path.join(tempdir, f"{testname}_compiled") - compile(self, clang, filename, compiled, timeout, tempdir) - - # capture binary run outputs - compiled_output = run_cmd( - [compiled], timeout, description="capture compilation output", ws=tempdir) - - rt_json = os.path.join(tempdir, f"{testname}_rt.json") - specify(self, specifier, compiled, rt_json, timeout, tempdir) - - rt_bc = os.path.join(tempdir, f"{testname}_rt.bc") - decompile(self, decompiler, rt_json, rt_bc, timeout, tempdir) - - rebuilt = os.path.join(tempdir, f"{testname}_rebuilt") - compile(self, clang, rt_bc, rebuilt, timeout, tempdir, ["-Wno-everything"]) - # capture outputs of binary after roundtrip - rebuilt_output = run_cmd( - [rebuilt], timeout, description="Capture binary output after roundtrip", ws=tempdir) - - # Clean up tempdir if no workspace specified - # otherwise keep it for debugging purposes - if not workspace: - shutil.rmtree(tempdir) - - self.assertEqual(compiled_output.stderr, - rebuilt_output.stderr, "Different stderr") - self.assertEqual(compiled_output.stdout, - rebuilt_output.stdout, "Different stdout") - self.assertEqual(compiled_output.returncode, - rebuilt_output.returncode, "Different return code") - - -class TestRoundtrip(unittest.TestCase): - pass - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("anvill", help="path to anvill-decompile-spec") - parser.add_argument("tests", help="path to test directory") - parser.add_argument("clang", help="path to clang") - parser.add_argument("workspace", nargs="?", default=None, - help="Where to save temporary unit test outputs") - parser.add_argument("-t", "--timeout", - help="set timeout in seconds", type=int) - - args = parser.parse_args() - - if args.workspace: - os.makedirs(args.workspace) - - def test_generator(path, test_name): - def test(self): - specifier = ["python3", "-m", "anvill"] - roundtrip(self, specifier, args.anvill, path, test_name, - args.clang, args.timeout, args.workspace) - - return test - - for item in os.scandir(args.tests): - test_name = "test_%s" % os.path.splitext(item.name)[0] - test = test_generator(item.path, test_name) - setattr(TestRoundtrip, test_name, test) - - unittest.main(argv=[sys.argv[0], "-v"]) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 2f6c44729..5847a04f2 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -316,6 +316,8 @@ message Function { map block_context = 7; StackEffects stack_effects = 8; StackFrame frame = 9; + + repeated Parameter in_scope_vars = 10; } message GlobalVariable { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 815472424..5d2f4b892 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -68,7 +68,7 @@ struct LowLoc { std::uint64_t Size() const; - bool operator==(const LowLoc &loc) const; + bool operator==(const LowLoc &loc) const = default; }; // A value, such as a parameter or a return value. Values are resident @@ -93,6 +93,8 @@ struct ValueDecl { // Type of this value. llvm::Type *type{nullptr}; + + bool operator==(const ValueDecl &) const = default; }; @@ -101,6 +103,8 @@ struct ParameterDecl : public ValueDecl { // Name of the parameter. std::string name; + + bool operator==(const ParameterDecl &) const = default; }; // A typed location in memory, that isn't actually code. This roughly @@ -217,13 +221,22 @@ struct CallableDecl { // live exits and entries struct BasicBlockVariable { ParameterDecl param; - size_t index; bool live_at_entry; bool live_at_exit; }; class BasicBlockContext { public: + size_t GetParamIndex(const ParameterDecl &decl) const; + + llvm::Value *ProvidePointerFromStruct(llvm::IRBuilder<> &ir, + llvm::StructType *sty, llvm::Value *, + const ParameterDecl &decl) const; + + llvm::Argument * + ProvidePointerFromFunctionArgs(llvm::Function *, + const ParameterDecl &decl) const; + virtual ~BasicBlockContext() = default; virtual const SpecStackOffsets &GetStackOffsets() const = 0; @@ -240,6 +253,8 @@ class BasicBlockContext { virtual ValueDecl ReturnValue() const = 0; + virtual const std::vector &GetParams() const = 0; + // Deduplicates locations and ensures there are no overlapping decls // A valid parameter list is a set of non overlapping a-locs with distinct names. std::vector LiveParamsAtEntryAndExit() const; @@ -248,9 +263,6 @@ class BasicBlockContext { std::vector LiveBBParamsAtEntry() const; std::vector LiveBBParamsAtExit() const; - - llvm::StructType *StructTypeFromVars(llvm::LLVMContext &llvm_context) const; - protected: virtual const std::vector &LiveParamsAtEntry() const = 0; virtual const std::vector &LiveParamsAtExit() const = 0; @@ -312,6 +324,7 @@ class SpecBlockContext : public BasicBlockContext { std::vector constants; std::vector live_params_at_entry; std::vector live_params_at_exit; + std::vector params; public: SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets, @@ -333,6 +346,7 @@ class SpecBlockContext : public BasicBlockContext { virtual size_t GetPointerDisplacement() const override; + virtual const std::vector &GetParams() const override; protected: virtual const std::vector &LiveParamsAtEntry() const override; @@ -394,6 +408,8 @@ struct FunctionDecl : public CallableDecl { std::size_t parameter_size{0}; + std::vector in_scope_variables; + // Declare this function in an LLVM module. llvm::Function *DeclareInModule(std::string_view name, llvm::Module &) const; diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index aa416eb30..6107def57 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -219,7 +219,6 @@ class LifterOptions { //TODO(ian): This should be initialized by an OS + arch pair stack_pointer_is_signed(false), should_remove_anvill_pc(true), - should_remove_assignments_to_next_pc(false), should_inline_basic_blocks(false) { CheckModuleContextMatchesArch(); } @@ -292,8 +291,6 @@ class LifterOptions { bool should_remove_anvill_pc : 1; - bool should_remove_assignments_to_next_pc : 1; - bool should_inline_basic_blocks : 1; private: diff --git a/include/anvill/Passes/BasicBlockPass.h b/include/anvill/Passes/BasicBlockPass.h index 6edaa8d9d..7ed23fc93 100644 --- a/include/anvill/Passes/BasicBlockPass.h +++ b/include/anvill/Passes/BasicBlockPass.h @@ -16,6 +16,7 @@ class BasicBlockContexts { public: virtual std::optional> GetBasicBlockContextForAddr(uint64_t addr) const = 0; + virtual const FunctionDecl &GetFunctionAtAddress(uint64_t addr) const = 0; }; template @@ -34,9 +35,12 @@ class BasicBlockPass : public llvm::PassInfoMixin> { auto &bb_pass = *static_cast(this); auto bbaddr = anvill::GetBasicBlockAddr(&F); if (bbaddr.has_value()) { - auto bb_cont = this->contexts.GetBasicBlockContextForAddr(*bbaddr); - if (bb_cont) { - return bb_pass.runOnBasicBlockFunction(F, AM, *bb_cont); + auto maybe_bb_cont = contexts.GetBasicBlockContextForAddr(*bbaddr); + if (maybe_bb_cont) { + const BasicBlockContext &bb_cont = *maybe_bb_cont; + auto &parent_func = + contexts.GetFunctionAtAddress(bb_cont.GetParentFunctionAddress()); + return bb_pass.runOnBasicBlockFunction(F, AM, bb_cont, parent_func); } } diff --git a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h index 42b2bec3f..01b143f2b 100644 --- a/include/anvill/Passes/ConvertPointerArithmeticToGEP.h +++ b/include/anvill/Passes/ConvertPointerArithmeticToGEP.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -34,7 +35,8 @@ class ConvertPointerArithmeticToGEP final llvm::PreservedAnalyses runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &); + const anvill::BasicBlockContext &, + const FunctionDecl &); // Returns the pass name static llvm::StringRef name(void); diff --git a/include/anvill/Passes/InlineBasicBlockFunctions.h b/include/anvill/Passes/InlineBasicBlockFunctions.h index 6c4c42b87..76f5bb3c1 100644 --- a/include/anvill/Passes/InlineBasicBlockFunctions.h +++ b/include/anvill/Passes/InlineBasicBlockFunctions.h @@ -14,20 +14,16 @@ namespace anvill { // with the goto intrinsic class InlineBasicBlockFunctions final : public BasicBlockPass { - private: - const EntityLifter &lifter; - public: - InlineBasicBlockFunctions(const BasicBlockContexts &contexts, - const EntityLifter &lifter) - : BasicBlockPass(contexts), - lifter(lifter) {} + InlineBasicBlockFunctions(const BasicBlockContexts &contexts) + : BasicBlockPass(contexts) {} static llvm::StringRef name(void); llvm::PreservedAnalyses runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &); + const anvill::BasicBlockContext &, + const anvill::FunctionDecl &); }; } // namespace anvill \ No newline at end of file diff --git a/include/anvill/Passes/RemoveAssignmentsToNextPC.h b/include/anvill/Passes/RemoveAssignmentsToNextPC.h deleted file mode 100644 index 45ffd6847..000000000 --- a/include/anvill/Passes/RemoveAssignmentsToNextPC.h +++ /dev/null @@ -1,33 +0,0 @@ - -#pragma once - -#include -#include -#include - -#include "anvill/Lifters.h" - - -namespace anvill { - -// attempts to replace assignments to next pc with idiomatic control flow that terminates the block -// with the goto intrinsic -class RemoveAssignmentsToNextPC final - : public BasicBlockPass { - private: - const EntityLifter &lifter; - - public: - RemoveAssignmentsToNextPC(const BasicBlockContexts &contexts, - const EntityLifter &lifter) - : BasicBlockPass(contexts), - lifter(lifter) {} - - static llvm::StringRef name(void); - - - llvm::PreservedAnalyses - runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &); -}; -} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Passes/RemoveRemillFunctionReturns.h b/include/anvill/Passes/RemoveRemillFunctionReturns.h deleted file mode 100644 index 1c10f20f3..000000000 --- a/include/anvill/Passes/RemoveRemillFunctionReturns.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include - -namespace anvill { - -class CrossReferenceFolder; -class CrossReferenceResolver; -class StackPointerResolver; - -enum ReturnAddressResult { - - // We've found a case where a value returned by `llvm.returnaddress`, or - // casted from `__anvill_ra`, reaches into the `pc` argument of the - // `__remill_function_return` intrinsic. This is the ideal case that we - // want to handle. - kFoundReturnAddress, - - // We've found a case where we're seeing a load from something derived from - // `__anvill_sp`, our "symbolic stack pointer", is reaching into the `pc` - // argument of `__remill_function_return`. This suggests that stack frame - // recovery has not happened yet, and thus we haven't really given stack - // frame recovery or stack frame splitting a chance to work. - kFoundSymbolicStackPointerLoad, - - // We've found a `load` or something else. This is probably a sign that - // stack frame recovery has happened, and that the actual return address - // is not necessarily the expected value, and so we need to try to swap - // out the return address with whatever we loaded. - kUnclassifiableReturnAddress -}; - -class RemoveRemillFunctionReturns final - : public llvm::PassInfoMixin { - private: - const CrossReferenceResolver &xref_resolver; - - public: - inline explicit RemoveRemillFunctionReturns( - const CrossReferenceResolver &xref_resolver_) - : xref_resolver(xref_resolver_) {} - - static llvm::StringRef name(void); - - llvm::PreservedAnalyses run(llvm::Function &F, - llvm::FunctionAnalysisManager &AM); - - private: - ReturnAddressResult QueryReturnAddress( - const CrossReferenceFolder &xref_folder, - const StackPointerResolver &sp_resolver, - llvm::Module *module, - llvm::Value *val) const; -}; -} // namespace anvill diff --git a/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h b/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h deleted file mode 100644 index 7ebd07d4f..000000000 --- a/include/anvill/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.h +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include - -#include "anvill/Lifters.h" - -namespace anvill { -// An intrinsic pass that currently assumes that the function returns to its caller, -// replacing the sound remill return with an anvill_return that returns the value specified by this -// functions ABI. -// TODO(Ian): make intrinsic pass compose with basic block passes somehow -class ReplaceRemillFunctionReturnsWithAnvillFunctionReturns - : public BasicBlockPass< - ReplaceRemillFunctionReturnsWithAnvillFunctionReturns> { - private: - const EntityLifter &lifter; - - public: - ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( - const BasicBlockContexts &contexts, const EntityLifter &lifter) - : BasicBlockPass(contexts), - lifter(lifter) {} - - static llvm::StringRef name(void); - - - llvm::PreservedAnalyses - runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &); -}; -} // namespace anvill \ No newline at end of file diff --git a/include/anvill/Passes/ReplaceStackReferences.h b/include/anvill/Passes/ReplaceStackReferences.h index 57312b16c..45b6588f4 100644 --- a/include/anvill/Passes/ReplaceStackReferences.h +++ b/include/anvill/Passes/ReplaceStackReferences.h @@ -1,11 +1,11 @@ #pragma once +#include +#include #include #include -#include "anvill/Lifters.h" - namespace anvill { /** @@ -28,6 +28,7 @@ class ReplaceStackReferences final llvm::PreservedAnalyses runOnBasicBlockFunction(llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &); + const anvill::BasicBlockContext &, + const FunctionDecl &); }; } // namespace anvill \ No newline at end of file diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index dd2781976..562c6d61a 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -92,12 +92,16 @@ struct ValueDecl; class Specification; class SpecBlockContexts : public BasicBlockContexts { std::unordered_map contexts; + std::unordered_map> funcs; public: SpecBlockContexts(const Specification &spec); virtual std::optional> GetBasicBlockContextForAddr(uint64_t addr) const override; + + virtual const FunctionDecl & + GetFunctionAtAddress(uint64_t addr) const override; }; diff --git a/include/anvill/Type.h b/include/anvill/Type.h index 634c016fc..bedd57265 100644 --- a/include/anvill/Type.h +++ b/include/anvill/Type.h @@ -93,6 +93,8 @@ struct FunctionType; struct UnknownType { unsigned size; + + bool operator==(const UnknownType &) const = default; }; using TypeSpec = @@ -101,6 +103,12 @@ using TypeSpec = std::shared_ptr, std::shared_ptr, UnknownType>; +bool operator==(std::shared_ptr, std::shared_ptr); +bool operator==(std::shared_ptr, std::shared_ptr); +bool operator==(std::shared_ptr, std::shared_ptr); +bool operator==(std::shared_ptr, std::shared_ptr); +bool operator==(std::shared_ptr, std::shared_ptr); + struct PointerType { template PointerType(T &&pointee, bool is_const) @@ -108,6 +116,8 @@ struct PointerType { is_const(is_const) {} TypeSpec pointee; bool is_const; + + bool operator==(const PointerType &) const = default; }; struct VectorType { @@ -117,6 +127,8 @@ struct VectorType { size(size) {} TypeSpec base; unsigned size; + + bool operator==(const VectorType &) const = default; }; struct ArrayType { @@ -126,10 +138,14 @@ struct ArrayType { size(size) {} TypeSpec base; unsigned size; + + bool operator==(const ArrayType &) const = default; }; struct StructType { std::vector members; + + bool operator==(const StructType &) const = default; }; struct FunctionType { @@ -142,6 +158,8 @@ struct FunctionType { TypeSpec return_type; std::vector arguments; bool is_variadic; + + bool operator==(const FunctionType &) const = default; }; // Dictionary of types to be used by the type specifier. diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 467a51a7d..1b76b62d1 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -134,10 +134,6 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, std::optional GetBasicBlockAddr(llvm::Function *func); -llvm::Argument * -ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, - const anvill::BasicBlockContext &context); - llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); bool HasMemLoc(const ValueDecl &v); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 2f68bfa93..3d9eb90a4 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -44,7 +44,6 @@ set(anvill_passes RemoveCompilerBarriers RemoveDelaySlotIntrinsics RemoveErrorIntrinsics - RemoveRemillFunctionReturns RemoveStackPointerCExprs RemoveTrivialPhisAndSelects RemoveUnusedFPClassificationCalls @@ -60,10 +59,7 @@ set(anvill_passes CombineAdjacentShifts ReplaceStackReferences RemoveCallIntrinsics - ReplaceRemillFunctionReturnsWithAnvillFunctionReturns - RemoveAssignmentsToNextPC InlineBasicBlockFunctions - RemoveAnvillReturns ) set(anvill_arch_HEADERS diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 75b1152a2..71d681f8c 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -71,11 +71,6 @@ struct std::hash { namespace anvill { -bool LowLoc::operator==(const LowLoc &loc) const { - return reg == loc.reg && mem_reg == loc.mem_reg && - loc.mem_offset == mem_offset && loc.size == size; -} - // Declare this global variable in an LLVM module. llvm::GlobalVariable * VariableDecl::DeclareInModule(const std::string &name, @@ -154,8 +149,7 @@ BasicBlockContext::LiveParamsAtEntryAndExit() const { if (!completely_covered) { std::copy(p.oredered_locs.begin(), p.oredered_locs.end(), std::inserter(covered, covered.end())); - auto ind = res.size(); - res.push_back({p, ind, live_at_ent, live_at_exit}); + res.push_back({p, live_at_ent, live_at_exit}); } } }; @@ -184,20 +178,6 @@ std::vector BasicBlockContext::LiveBBParamsAtExit() const { return res; } -llvm::StructType * -BasicBlockContext::StructTypeFromVars(llvm::LLVMContext &llvm_context) const { - std::vector in_scope_locals = - this->LiveParamsAtEntryAndExit(); - std::vector field_types; - std::transform( - in_scope_locals.begin(), in_scope_locals.end(), - std::back_inserter(field_types), - [](const BasicBlockVariable ¶m) { return param.param.type; }); - - return llvm::StructType::get(llvm_context, field_types, - "sty_for_basic_block_function"); -} - // Declare this function in an LLVM module. llvm::Function * FunctionDecl::DeclareInModule(std::string_view name, @@ -239,6 +219,28 @@ FunctionDecl::DeclareInModule(std::string_view name, return func; } +size_t BasicBlockContext::GetParamIndex(const ParameterDecl &decl) const { + auto stack_var = std::find(GetParams().begin(), GetParams().end(), decl); + CHECK(stack_var != GetParams().end()); + return stack_var - GetParams().begin(); +} + +llvm::Value *BasicBlockContext::ProvidePointerFromStruct( + llvm::IRBuilder<> &ir, llvm::StructType *sty, llvm::Value *target_sty, + const ParameterDecl &decl) const { + auto i32 = llvm::IntegerType::get(ir.getContext(), 32); + auto index = GetParamIndex(decl); + auto ptr = ir.CreateGEP( + sty, target_sty, + {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, index)}); + return ptr; +} + +llvm::Argument *BasicBlockContext::ProvidePointerFromFunctionArgs( + llvm::Function *func, const ParameterDecl ¶m) const { + return func->getArg(GetParamIndex(param) + remill::kNumBlockArgs); +} + ValueDecl SpecBlockContext::ReturnValue() const { return this->decl.returns; } @@ -265,7 +267,8 @@ SpecBlockContext::SpecBlockContext( offsets(std::move(offsets)), constants(std::move(constants)), live_params_at_entry(std::move(live_params_at_entry)), - live_params_at_exit(std::move(live_params_at_exit)) {} + live_params_at_exit(std::move(live_params_at_exit)), + params(decl.in_scope_variables) {} size_t SpecBlockContext::GetPointerDisplacement() const { return this->decl.GetPointerDisplacement(); @@ -287,6 +290,10 @@ const std::vector &SpecBlockContext::GetConstants() const { return this->constants; } +const std::vector &SpecBlockContext::GetParams() const { + return this->params; +} + // Interpret `target` as being the function to call, and call it from within // a basic block in a lifted bitcode function. Returns the new value of the // memory pointer. diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 790cf7b18..8d011896a 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -8,11 +8,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -24,26 +24,19 @@ #include #include "Lifters/CodeLifter.h" +#include "Lifters/FunctionLifter.h" #include "anvill/Declarations.h" #include "anvill/Optimize.h" namespace anvill { -CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlockFunction() && { +void BasicBlockLifter::LiftBasicBlockFunction() { auto bbfunc = this->CreateBasicBlockFunction(); this->LiftInstructionsIntoLiftedFunction(); CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); - - //bbfunc.func->dump(); - //lifted_func->dump(); - //LOG(FATAL) << "fdumps"; - - this->RecursivelyInlineFunctionCallees(bbfunc.func); - - return CallableBasicBlockFunction(bbfunc.func, block_def, std::move(*this)); } @@ -202,9 +195,11 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( } return !cc.stop; } else if (std::holds_alternative(override)) { - remill::AddTerminatingTailCall(block, intrinsics.function_return, - intrinsics); - return false; + auto func = block->getParent(); + auto should_return = func->getArg(kShouldReturnArgNum); + builder.CreateStore(llvm::Constant::getAllOnesValue( + llvm::IntegerType::getInt1Ty(llvm_context)), + should_return); } return true; @@ -354,7 +349,7 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { llvm::IRBuilder<> builder(bb); builder.CreateStore(remill::LoadNextProgramCounter(bb, this->intrinsics), - this->lifted_func->getArg(remill::kNumBlockArgs)); + this->lifted_func->getArg(kNextPCArgNum)); llvm::ReturnInst::Create( @@ -367,8 +362,9 @@ llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); } -BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { - std::string name_ = "basic_block_func" + std::to_string(this->block_def.addr); +llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { + std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + + std::to_string(this->block_def.addr); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( @@ -380,44 +376,43 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // pointer to state pointer params[remill::kStatePointerArgNum] = llvm::PointerType::get(context, 0); - //next_pc_out - params.push_back(llvm::PointerType::get(context, 0)); - for (size_t i = 0; i < this->var_struct_ty->getNumElements(); i++) { // pointer to each param params.push_back(llvm::PointerType::get(context, 0)); } - - llvm::FunctionType *func_type = - llvm::FunctionType::get(lifted_func_type->getReturnType(), params, false); - + auto ret_type = this->block_context->ReturnValue(); + llvm::FunctionType *func_type = llvm::FunctionType::get( + this->flifter.curr_decl->type->getReturnType(), params, false); llvm::StringRef name(name_.data(), name_.size()); - auto func = - llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, 0u, - name, this->semantics_module); + return llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, + 0u, name, this->semantics_module); +} +BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { + auto func = bb_func; func->setMetadata(anvill::kBasicBlockMetadata, GetBasicBlockAnnotation(this->block_def.addr)); - - auto start_ind = lifted_func_type->getNumParams() + 1; - for (auto v : this->block_context->LiveParamsAtEntryAndExit()) { + auto &context = this->semantics_module->getContext(); + llvm::FunctionType *lifted_func_type = + llvm::dyn_cast(remill::RecontextualizeType( + this->options.arch->LiftedFunctionType(), context)); + auto start_ind = lifted_func_type->getNumParams(); + for (auto var : decl.in_scope_variables) { auto arg = remill::NthArgument(func, start_ind); - if (!v.param.name.empty()) { - arg->setName(v.param.name); + if (!var.name.empty()) { + arg->setName(var.name); } - if (std::all_of(v.param.oredered_locs.begin(), v.param.oredered_locs.end(), + if (std::all_of(var.oredered_locs.begin(), var.oredered_locs.end(), [](const LowLoc &loc) -> bool { return loc.reg; })) { // Registers should not have aliases arg->addAttr(llvm::Attribute::get(llvm_context, llvm::Attribute::AttrKind::NoAlias)); } - // TODO(Ian): If we can eliminate the stack then we also are able to declare more no aliases here, not sure the - // best way to handle this start_ind += 1; } @@ -425,23 +420,23 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto memory = remill::NthArgument(func, remill::kMemoryPointerArgNum); auto state = remill::NthArgument(func, remill::kStatePointerArgNum); auto pc = remill::NthArgument(func, remill::kPCArgNum); - auto next_pc_out = remill::NthArgument(func, remill::kNumBlockArgs); memory->setName("memory"); pc->setName("program_counter"); - next_pc_out->setName("next_pc_out"); state->setName("stack"); auto liftedty = this->options.arch->LiftedFunctionType(); std::vector new_params; - new_params.reserve(liftedty->getNumParams() + 1); + new_params.reserve(liftedty->getNumParams() + 2); for (auto param : liftedty->params()) { new_params.push_back(param); } - new_params.push_back(llvm::PointerType::get(context, 0)); + auto ptr_ty = llvm::PointerType::get(context, 0); + new_params.push_back(ptr_ty); + new_params.push_back(ptr_ty); llvm::FunctionType *new_func_type = llvm::FunctionType::get( @@ -450,7 +445,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->lifted_func = llvm::Function::Create( new_func_type, llvm::GlobalValue::ExternalLinkage, 0u, - std::string(name) + "lowlift", this->semantics_module); + func->getName() + "lowlift", this->semantics_module); options.arch->InitializeEmptyLiftedFunction(this->lifted_func); @@ -458,6 +453,11 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { llvm::BasicBlock::Create(context, "", func); auto &blk = func->getEntryBlock(); llvm::IRBuilder<> ir(&blk); + auto next_pc = ir.CreateAlloca(llvm::IntegerType::getInt64Ty(context), + nullptr, "next_pc"); + auto should_return = ir.CreateAlloca(llvm::IntegerType::getInt1Ty(context), + nullptr, "should_return"); + ir.CreateStore(llvm::ConstantInt::getFalse(context), should_return); ir.CreateStore(memory, ir.CreateAlloca(memory->getType(), nullptr, "MEMORY")); this->state_ptr = @@ -482,8 +482,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { ir.CreateStore(nmem, remill::LoadMemoryPointerRef(ir.GetInsertBlock())); } - PointerProvider ptr_provider = [this, func](size_t index) -> llvm::Value * { - return this->ProvidePointerFromFunctionArgs(func, index); + PointerProvider ptr_provider = + [this, func](const ParameterDecl ¶m) -> llvm::Value * { + return this->block_context->ProvidePointerFromFunctionArgs(func, param); }; LOG(INFO) << "Live values at entry to function " @@ -530,8 +531,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { ir, this->address_type, this->block_def.addr); ir.CreateStore(pc_val, pc_ptr); - std::array args = { - this->state_ptr, pc_val, mem_res, next_pc_out}; + std::array args = { + this->state_ptr, pc_val, mem_res, next_pc, should_return}; auto ret_mem = ir.CreateCall(this->lifted_func, args); @@ -540,16 +541,73 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { CHECK(ir.GetInsertPoint() == func->getEntryBlock().end()); - ir.CreateRet(ret_mem); - BasicBlockFunction bbf{func, pc_arg, mem_arg, next_pc_out}; + BasicBlockFunction bbf{func, pc_arg, mem_arg, next_pc, state}; + + TerminateBasicBlockFunction(func, ir, ret_mem, should_return, bbf); return bbf; } +// Setup the returns for this function we tail call all successors +void BasicBlockLifter::TerminateBasicBlockFunction( + llvm::Function *caller, llvm::IRBuilder<> &ir, llvm::Value *next_mem, + llvm::Value *should_return, const BasicBlockFunction &bbfunc) { + auto &context = this->bb_func->getContext(); + this->invalid_successor_block = + llvm::BasicBlock::Create(context, "invalid_successor", this->bb_func); + auto jump_block = llvm::BasicBlock::Create(context, "", this->bb_func); + auto ret_block = llvm::BasicBlock::Create(context, "", this->bb_func); + + // TODO(Ian): maybe want to call remill_error here + new llvm::UnreachableInst(next_mem->getContext(), + this->invalid_successor_block); + + auto should_return_value = + ir.CreateLoad(llvm::IntegerType::getInt1Ty(context), should_return); + ir.CreateCondBr(should_return_value, ret_block, jump_block); + + ir.SetInsertPoint(jump_block); + auto pc = ir.CreateLoad(address_type, bbfunc.next_pc_out); + auto sw = ir.CreateSwitch(pc, this->invalid_successor_block); + + for (auto e : this->block_def.outgoing_edges) { + auto succ_const = llvm::ConstantInt::get( + llvm::cast(this->address_type), e); + + auto calling_bb = + llvm::BasicBlock::Create(next_mem->getContext(), "", bbfunc.func); + llvm::IRBuilder<> calling_bb_builder(calling_bb); + auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(e); + auto retval = child_lifter.ControlFlowCallBasicBlockFunction( + caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem); + if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { + calling_bb_builder.CreateRetVoid(); + } else { + calling_bb_builder.CreateRet(retval); + } + sw->addCase(succ_const, calling_bb); + } + + ir.SetInsertPoint(ret_block); + if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { + ir.CreateRetVoid(); + } else { + auto retval = anvill::LoadLiftedValue( + block_context->ReturnValue(), options.TypeDictionary(), intrinsics, + options.arch, ir, this->state_ptr, next_mem); + ir.CreateRet(retval); + } +} llvm::StructType *BasicBlockLifter::StructTypeFromVars() const { - return this->block_context->StructTypeFromVars(this->llvm_context); + std::vector field_types; + std::transform(decl.in_scope_variables.begin(), decl.in_scope_variables.end(), + std::back_inserter(field_types), + [](auto ¶m) { return param.type; }); + + return llvm::StructType::get(llvm_context, field_types, + "sty_for_basic_block_function"); } // Packs in scope variables into a struct @@ -561,7 +619,7 @@ void BasicBlockLifter::PackLiveValues( for (auto decl : decls) { if (!HasMemLoc(decl.param)) { - auto ptr = into_vars(decl.index); + auto ptr = into_vars(decl.param); auto state_loaded_value = LoadLiftedValue( decl.param, this->type_provider.Dictionary(), this->intrinsics, @@ -587,7 +645,7 @@ void BasicBlockLifter::UnpackLiveValues( for (auto decl : decls) { // is this how we want to do this.... now the value really doesnt live in memory anywhere but the frame. if (!HasMemLoc(decl.param)) { - auto ptr = returned_value(decl.index); + auto ptr = returned_value(decl.param); if (auto insn = llvm::dyn_cast(ptr)) { insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( decl.param.spec_type)); @@ -612,40 +670,34 @@ void BasicBlockLifter::UnpackLiveValues( CHECK(bldr.GetInsertPoint() == blk->end()); } - -void BasicBlockLifter::CallBasicBlockFunction( +// TODO(Ian): dependent on calling context we need fetch the memory and next program counter +// ref either from the args or from the parent func state +llvm::CallInst *BasicBlockLifter::CallBasicBlockFunction( llvm::IRBuilder<> &builder, llvm::Value *parent_state, - const CallableBasicBlockFunction &cbfunc, llvm::Value *parent_stack) const { - + llvm::Value *parent_stack, llvm::Value *memory_pointer) const { - std::vector args(remill::kNumBlockArgs + 1); + std::vector args(remill::kNumBlockArgs); auto out_param_locals = builder.CreateAlloca(this->var_struct_ty); args[0] = parent_stack; args[remill::kPCArgNum] = options.program_counter_init_procedure( - builder, this->address_type, cbfunc.GetBlock().addr); - args[remill::kMemoryPointerArgNum] = - remill::LoadMemoryPointer(builder, this->intrinsics); - - args[remill::kNumBlockArgs] = - remill::LoadNextProgramCounterRef(builder.GetInsertBlock()); - - auto bbvars = this->block_context->LiveParamsAtEntryAndExit(); + builder, this->address_type, block_def.addr); + args[remill::kMemoryPointerArgNum] = memory_pointer; AbstractStack stack( builder.getContext(), {{decl.maximum_depth, parent_stack}}, this->options.stack_frame_recovery_options.stack_grows_down, decl.GetPointerDisplacement()); - PointerProvider ptr_provider = [&builder, this, out_param_locals, &bbvars, - &stack](size_t index) -> llvm::Value * { - auto repr_var = bbvars[index]; - LOG(INFO) << "Lifting: " << repr_var.param.name << " for call"; - if (HasMemLoc(repr_var.param)) { + PointerProvider ptr_provider = + [&builder, this, out_param_locals, + &stack](const ParameterDecl &repr_var) -> llvm::Value * { + LOG(INFO) << "Lifting: " << repr_var.name << " for call"; + if (HasMemLoc(repr_var)) { // TODO(Ian): the assumption here since we are able to build a single pointer here into the frame is that // svars are single valuedecl contigous - CHECK(repr_var.param.oredered_locs.size() == 1); + CHECK(repr_var.oredered_locs.size() == 1); auto stack_ptr = stack.PointerToStackMemberFromOffset( - builder, repr_var.param.oredered_locs[0].mem_offset); + builder, repr_var.oredered_locs[0].mem_offset); if (stack_ptr) { return *stack_ptr; } else { @@ -653,57 +705,57 @@ void BasicBlockLifter::CallBasicBlockFunction( << "Unable to create a ptr to the stack, the stack is too small to represent the param."; } } - return this->ProvidePointerFromStruct(builder, out_param_locals, index); + + // ok so this should be provide pointer from args in a way + // stack probably shouldnt be passed at all, if we dont have a loc + // then it's not live + return block_context->ProvidePointerFromStruct(builder, var_struct_ty, + out_param_locals, repr_var); }; this->PackLiveValues(builder, parent_state, ptr_provider, this->block_context->LiveBBParamsAtEntry()); - - for (size_t ind = 0; - ind < this->block_context->LiveParamsAtEntryAndExit().size(); ind++) { - auto ptr = ptr_provider(ind); + for (auto ¶m : block_context->GetParams()) { + auto ptr = ptr_provider(param); CHECK(ptr != nullptr); args.push_back(ptr); } - auto new_mem_ptr = builder.CreateCall(cbfunc.GetFunction(), args); + auto retval = builder.CreateCall(bb_func, args); + retval->setTailCall(true); - auto mem_ptr_ref = remill::LoadMemoryPointerRef(builder.GetInsertBlock()); - - builder.CreateStore(new_mem_ptr, mem_ptr_ref); + return retval; +} - this->UnpackLiveValues(builder, ptr_provider, parent_state, - this->block_context->LiveBBParamsAtExit()); -} // namespace BasicBlockLifter::UnpackLiveValues(llvm::IRBuilder<>&bldr,PointerProviderreturned_value,llvm::Value*into_state_ptr,conststd::vector&decls)const +llvm::CallInst *BasicBlockLifter::ControlFlowCallBasicBlockFunction( + llvm::Function *caller, llvm::IRBuilder<> &builder, + llvm::Value *parent_state, llvm::Value *parent_stack, + llvm::Value *memory_pointer) const { + std::vector args; + std::transform(caller->arg_begin(), caller->arg_end(), + std::back_inserter(args), + [](llvm::Argument &arg) -> llvm::Value * { return &arg; }); -void CallableBasicBlockFunction::CallBasicBlockFunction( - llvm::IRBuilder<> &add_to_llvm, llvm::Value *parent_state, - llvm::Value *abstract_stack) const { - this->bb_lifter.CallBasicBlockFunction(add_to_llvm, parent_state, *this, - abstract_stack); -} - -CallableBasicBlockFunction BasicBlockLifter::LiftBasicBlock( - std::unique_ptr block_context, const FunctionDecl &decl, - const CodeBlock &block_def, const LifterOptions &options_, - llvm::Module *semantics_module, const TypeTranslator &type_specifier) { + auto retval = builder.CreateCall(bb_func, args); + retval->setTailCall(true); - return BasicBlockLifter(std::move(block_context), decl, block_def, options_, - semantics_module, type_specifier) - .LiftBasicBlockFunction(); + return retval; } BasicBlockLifter::BasicBlockLifter( std::unique_ptr block_context, const FunctionDecl &decl, - const CodeBlock &block_def, const LifterOptions &options_, - llvm::Module *semantics_module, const TypeTranslator &type_specifier) + CodeBlock block_def, const LifterOptions &options_, + llvm::Module *semantics_module, const TypeTranslator &type_specifier, + FunctionLifter &flifter) : CodeLifter(options_, semantics_module, type_specifier), block_context(std::move(block_context)), - block_def(block_def), - decl(decl) { + block_def(std::move(block_def)), + decl(decl), + flifter(flifter) { this->var_struct_ty = this->StructTypeFromVars(); + this->bb_func = this->DeclareBasicBlockFunction(); } CallableBasicBlockFunction::CallableBasicBlockFunction( @@ -721,22 +773,4 @@ llvm::Function *CallableBasicBlockFunction::GetFunction() const { return this->func; } -llvm::Value *BasicBlockLifter::ProvidePointerFromStruct(llvm::IRBuilder<> &ir, - llvm::Value *target_sty, - size_t index) const { - auto i32 = llvm::IntegerType::get(llvm_context, 32); - auto ptr = ir.CreateGEP( - this->var_struct_ty, target_sty, - {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, index)}); - return ptr; -} - -llvm::Value * -BasicBlockLifter::ProvidePointerFromFunctionArgs(llvm::Function *func, - size_t index) const { - return anvill::ProvidePointerFromFunctionArgs(func, index, - *this->block_context); -} - - } // namespace anvill diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 61f40fb92..565b365b5 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,12 +22,18 @@ namespace anvill { +enum : size_t { + kNextPCArgNum = remill::kNumBlockArgs, + kShouldReturnArgNum, + kNumLiftedBasicBlockArgs +}; struct BasicBlockFunction { llvm::Function *func; llvm::Argument *pc_arg; llvm::Argument *mem_ptr; - llvm::Argument *next_pc_out_param; + llvm::AllocaInst *next_pc_out; + llvm::Argument *stack; }; class CallableBasicBlockFunction; @@ -38,15 +45,8 @@ class CallableBasicBlockFunction; */ class BasicBlockLifter : public CodeLifter { private: - llvm::Value *ProvidePointerFromStruct(llvm::IRBuilder<> &ir, llvm::Value *, - size_t index) const; - - llvm::Value *ProvidePointerFromFunctionArgs(llvm::Function *, - size_t index) const; - - std::unique_ptr block_context; - const CodeBlock &block_def; + CodeBlock block_def; llvm::StructType *var_struct_ty{nullptr}; @@ -57,6 +57,14 @@ class BasicBlockLifter : public CodeLifter { const FunctionDecl &decl; + llvm::Function *bb_func{nullptr}; + + FunctionLifter &flifter; + + llvm::BasicBlock *invalid_successor_block{nullptr}; + + llvm::Function *DeclareBasicBlockFunction(); + llvm::StructType *StructTypeFromVars() const; remill::DecodingContext ApplyContextAssignments( @@ -69,6 +77,10 @@ class BasicBlockLifter : public CodeLifter { BasicBlockFunction CreateBasicBlockFunction(); + void TerminateBasicBlockFunction(llvm::Function *caller, + llvm::IRBuilder<> &ir, llvm::Value *next_mem, + llvm::Value *should_return, + const BasicBlockFunction &bbfunc); bool ApplyInterProceduralControlFlowOverride(const remill::Instruction &insn, llvm::BasicBlock *&block); @@ -96,21 +108,18 @@ class BasicBlockLifter : public CodeLifter { public: BasicBlockLifter(std::unique_ptr block_context, - const FunctionDecl &decl, const CodeBlock &block_def, + const FunctionDecl &decl, CodeBlock block_def, const LifterOptions &options_, llvm::Module *semantics_module, - const TypeTranslator &type_specifier); - static CallableBasicBlockFunction - LiftBasicBlock(std::unique_ptr block_context, - const FunctionDecl &decl, const CodeBlock &block_def, - const LifterOptions &options_, llvm::Module *semantics_module, - const TypeTranslator &type_specifier); + const TypeTranslator &type_specifier, + FunctionLifter &flifter); - CallableBasicBlockFunction LiftBasicBlockFunction() &&; + void LiftBasicBlockFunction(); - using PointerProvider = std::function; + using PointerProvider = + std::function; // Packs in scope variables into a struct @@ -124,9 +133,14 @@ class BasicBlockLifter : public CodeLifter { // Calls a basic block function and unpacks the result into the state - void CallBasicBlockFunction(llvm::IRBuilder<> &, llvm::Value *state_ptr, - const CallableBasicBlockFunction &, - llvm::Value *parent_stack) const; + llvm::CallInst *CallBasicBlockFunction(llvm::IRBuilder<> &, + llvm::Value *state_ptr, + llvm::Value *parent_stack, + llvm::Value *memory_pointer) const; + + llvm::CallInst *ControlFlowCallBasicBlockFunction( + llvm::Function *caller, llvm::IRBuilder<> &, llvm::Value *state_ptr, + llvm::Value *parent_stack, llvm::Value *memory_pointer) const; BasicBlockLifter(BasicBlockLifter &&) = default; }; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 9ac700549..0a227d1d0 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -135,38 +135,6 @@ FunctionLifter::FunctionLifter(const LifterOptions &options_, *this->options.module); } - -llvm::BranchInst * -FunctionLifter::BranchToInst(uint64_t from_addr, uint64_t to_addr, - const remill::DecodingContext &mapper, - llvm::BasicBlock *from_block) { - auto br = llvm::BranchInst::Create(GetOrCreateBlock(to_addr), from_block); - AnnotateInstruction(br, pc_annotation_id, pc_annotation); - return br; -} - - -llvm::BasicBlock *FunctionLifter::GetOrCreateBlock(uint64_t baddr) { - auto &block = this->addr_to_block[baddr]; - if (block) { - return block; - } - - std::stringstream ss; - ss << "inst_" << std::hex << baddr; - block = llvm::BasicBlock::Create(llvm_context, ss.str(), lifted_func); - - return block; -} - -llvm::BasicBlock * -FunctionLifter::GetOrCreateTargetBlock(const remill::Instruction &from_inst, - uint64_t to_addr, - const remill::DecodingContext &mapper) { - return GetOrCreateBlock(to_addr); -} - - void FunctionLifter::InsertError(llvm::BasicBlock *block) { llvm::IRBuilder<> ir{block}; auto tail = remill::AddTerminatingTailCall( @@ -367,41 +335,42 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } -CallableBasicBlockFunction -FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) const { +BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t addr) { + std::pair key{curr_decl->address, addr}; + auto lifter = this->bb_lifters.find(key); + if (lifter != this->bb_lifters.end()) { + return lifter->second; + } std::unique_ptr context = std::make_unique( - this->curr_decl->GetBlockContext(blk.addr)); + this->curr_decl->GetBlockContext(addr)); - return BasicBlockLifter::LiftBasicBlock( - std::move(context), *this->curr_decl, blk, this->options, - this->semantics_module.get(), this->type_specifier); + CodeBlock defblk = {addr, 0, std::unordered_set(), + std::unordered_map()}; + auto maybe_blk = this->curr_decl->cfg.find(addr); + if (maybe_blk != this->curr_decl->cfg.end()) { + defblk = maybe_blk->second; + } + + auto inserted = this->bb_lifters.emplace( + key, + BasicBlockLifter(std::move(context), *this->curr_decl, std::move(defblk), + this->options, this->semantics_module.get(), + this->type_specifier, *this)); + return inserted.first->second; } +const BasicBlockLifter & +FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) { + auto &lifter = this->GetOrCreateBasicBlockLifter(blk.addr); + lifter.LiftBasicBlockFunction(); + return lifter; +} void FunctionLifter::VisitBlock(CodeBlock blk, llvm::Value *lifted_function_state, llvm::Value *abstract_stack) { - auto llvm_blk = this->GetOrCreateBlock(blk.addr); - llvm::IRBuilder<> builder(llvm_blk); - - - auto bbfunc = this->LiftBasicBlockFunction(blk); - - CHECK(!llvm::verifyFunction(*bbfunc.GetFunction(), &llvm::errs())); - - bbfunc.CallBasicBlockFunction(builder, lifted_function_state, abstract_stack); - CHECK(anvill::GetBasicBlockAddr(bbfunc.GetFunction()).has_value()); - - auto pc = remill::LoadNextProgramCounter(llvm_blk, this->intrinsics); - - auto sw = builder.CreateSwitch(pc, this->invalid_successor_block); - - for (uint64_t succ : blk.outgoing_edges) { - sw->addCase(llvm::ConstantInt::get( - llvm::cast(this->address_type), succ), - this->GetOrCreateBlock(succ)); - } + LiftBasicBlockFunction(blk); } void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, @@ -414,15 +383,6 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, DLOG(INFO) << "Visiting: " << std::hex << addr; this->VisitBlock(blk, lifted_function_state, abstract_stack); } - - // NOTE(Ian): some blocks may be empty ie. if the CFG communicates a possible transition to some undecodeable - // bytes so here we check for block transfers that got added that we havent initialized and add an error - // if we end up transferring there. - for (auto &blks : this->lifted_func->getBasicBlockList()) { - if (!blks.getTerminator()) { - llvm::BranchInst::Create(this->invalid_successor_block, &blks); - } - } } @@ -495,14 +455,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { this->CreateLiftedFunction(native_func->getName().str() + ".lifted"); lifted_func = lifted_func_st.func; - - invalid_successor_block = - llvm::BasicBlock::Create(lifted_func_st.func->getContext(), - "invalid_successor", lifted_func_st.func); - remill::AddTerminatingTailCall(invalid_successor_block, intrinsics.error, - intrinsics); - - const auto pc = lifted_func_st.pc_arg; const auto entry_block = &(lifted_func->getEntryBlock()); pc_reg_ref = @@ -545,8 +497,23 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - auto entry_insn = this->GetOrCreateBlock(this->func_address); - ir.CreateBr(entry_insn); + auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->func_address); + + auto memptr = remill::LoadMemoryPointer(ir, this->intrinsics); + + auto call_inst = entry_lifter.CallBasicBlockFunction( + ir, lifted_func_st.state_ptr, abstract_stack, memptr); + + if (!call_inst->getType()->isVoidTy()) { + // TODO(Ian): this memptr is not right + // The bad effect that could happen here I guess is that the return read might not be tied to + // this store. + memptr = StoreNativeValue(call_inst, curr_decl->returns, + type_specifier.Dictionary(), intrinsics, ir, + lifted_func_st.state_ptr, memptr); + } + + ir.CreateRet(memptr); AnnotateInstructions(entry_block, pc_annotation_id, GetPCAnnotation(func_address)); @@ -730,25 +697,25 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, const auto target_module = options.module; auto &module_context = target_module->getContext(); - + std::string prefix = "func" + std::to_string(decl.address); if (!func->isDeclaration()) { for (auto &[block_addr, block] : decl.cfg) { - std::string name = "basic_block_func" + std::to_string(block_addr); + std::string name = prefix + "basic_block" + std::to_string(block_addr); auto new_version = target_module->getFunction(name); + auto old_version = semantics_module->getFunction(name); if (!new_version) { - auto old_version = semantics_module->getFunction(name); auto type = llvm::dyn_cast(remill::RecontextualizeType( old_version->getFunctionType(), module_context)); new_version = llvm::Function::Create( type, llvm::GlobalValue::ExternalLinkage, name, target_module); - remill::CloneFunctionInto(old_version, new_version); - new_version->setMetadata( - kBasicBlockMetadata, - this->GetAddrAnnotation(block_addr, module_context)); - CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); } + remill::CloneFunctionInto(old_version, new_version); + new_version->setMetadata( + kBasicBlockMetadata, + this->GetAddrAnnotation(block_addr, module_context)); + CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); } } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 03d649fc8..2fe5ebc5e 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -32,6 +32,19 @@ #include "CodeLifter.h" #include "Lifters/BasicBlockLifter.h" +namespace std { +template +struct hash> { + std::size_t operator()(std::pair const &p) const { + std::size_t seed(0); + llvm::hash_combine(seed, p.first); + llvm::hash_combine(seed, p.second); + + return seed; + } +}; +} // namespace std + namespace llvm { class Constant; class Function; @@ -64,6 +77,8 @@ struct LiftedFunction { // Orchestrates lifting of instructions and control-flow between instructions. class FunctionLifter : public CodeLifter { + friend class BasicBlockLifter; + public: ~FunctionLifter(void); @@ -89,8 +104,9 @@ class FunctionLifter : public CodeLifter { const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; + BasicBlockLifter &GetOrCreateBasicBlockLifter(uint64_t addr); - CallableBasicBlockFunction LiftBasicBlockFunction(const CodeBlock &) const; + const BasicBlockLifter &LiftBasicBlockFunction(const CodeBlock &); llvm::Function *GetBasicBlockFunction(uint64_t address) const; @@ -164,8 +180,9 @@ class FunctionLifter : public CodeLifter { // Maps program counters to lifted functions. std::unordered_map addr_to_func; - - llvm::BasicBlock *invalid_successor_block{nullptr}; + // maps a bbaddr to the lifter for that block + std::unordered_map, BasicBlockLifter> + bb_lifters; // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. @@ -175,23 +192,6 @@ class FunctionLifter : public CodeLifter { // returned function is a "high-level" function. llvm::Function *GetOrDeclareFunction(const FunctionDecl &decl); - - llvm::BranchInst *BranchToInst(uint64_t from_addr, uint64_t to_addr, - const remill::DecodingContext &mapper, - llvm::BasicBlock *from_block); - - // Helper to get the basic block to contain the instruction at `addr`. This - // function drives a work list, where the first time we ask for the - // instruction at `addr`, we enqueue a bit of work to decode and lift that - // instruction. - llvm::BasicBlock *GetOrCreateBlock(uint64_t addr); - - // Attempts to lookup any redirection of the given address, and then - // calls GetOrCreateBlock - llvm::BasicBlock * - GetOrCreateTargetBlock(const remill::Instruction &from_inst, uint64_t to_addr, - const remill::DecodingContext &mapper); - void InsertError(llvm::BasicBlock *block); diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index d5655f064..03c9144e2 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -53,7 +53,6 @@ #include #include #include -#include // clang-format on #include @@ -63,7 +62,6 @@ #include #include #include -#include #include #include #include @@ -78,7 +76,6 @@ #include #include -#include "anvill/Passes/RemoveAnvillReturns.h" #include "anvill/Passes/SplitStackFrameAtReturnAddress.h" #include "anvill/Specification.h" @@ -221,9 +218,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::InstCombinePass()); fpm.addPass(llvm::VerifierPass()); - fpm.addPass(anvill::ReplaceRemillFunctionReturnsWithAnvillFunctionReturns( - contexts, lifter)); - fpm.addPass(llvm::VerifierPass()); AddSinkSelectionsIntoBranchTargets(fpm); fpm.addPass(llvm::VerifierPass()); AddRemoveUnusedFPClassificationCalls(fpm); @@ -304,9 +298,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, AddTransformRemillJumpIntrinsics(second_fpm, xr); second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); - if (options.should_remove_assignments_to_next_pc) { - second_fpm.addPass(anvill::RemoveAssignmentsToNextPC(contexts, lifter)); - } //AddRemoveRemillFunctionReturns(second_fpm, xr); //AddConvertSymbolicReturnAddressToConcreteReturnAddress(second_fpm); AddLowerRemillUndefinedIntrinsics(second_fpm); @@ -343,21 +334,18 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, } } + mpm.run(module, mam); + if (lifter.Options().should_inline_basic_blocks) { llvm::FunctionPassManager inliner; - inliner.addPass(InlineBasicBlockFunctions(contexts, lifter)); + inliner.addPass(InlineBasicBlockFunctions(contexts)); llvm::ModulePassManager mpminliner; mpminliner.addPass( llvm::createModuleToFunctionPassAdaptor(std::move(inliner))); mpminliner.addPass( llvm::createModuleToPostOrderCGSCCPassAdaptor(llvm::InlinerPass())); - llvm::FunctionPassManager rm_returns; - rm_returns.addPass(anvill::RemoveAnvillReturns()); - - mpminliner.addPass( - llvm::createModuleToFunctionPassAdaptor(std::move(rm_returns))); mpminliner.run(module, mam); diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index daaa65012..19bec6289 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -6,6 +6,7 @@ * the LICENSE file found in the root directory of this source tree. */ +#include #include #include #include @@ -568,7 +569,7 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldScaledIndex(llvm::Function &f) { llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::runOnBasicBlockFunction( llvm::Function &function, llvm::FunctionAnalysisManager &fam, - const anvill::BasicBlockContext &) { + const anvill::BasicBlockContext &, const FunctionDecl &) { bool changed = impl->ConvertLoadInt(function); changed |= impl->FoldPtrAdd(function); changed |= impl->FoldScaledIndex(function); diff --git a/lib/Passes/InlineBasicBlockFunctions.cpp b/lib/Passes/InlineBasicBlockFunctions.cpp index 7b2b8095b..8b2674194 100644 --- a/lib/Passes/InlineBasicBlockFunctions.cpp +++ b/lib/Passes/InlineBasicBlockFunctions.cpp @@ -1,7 +1,6 @@ #include "anvill/Passes/InlineBasicBlockFunctions.h" #include -#include #include #include #include @@ -30,7 +29,7 @@ llvm::StringRef InlineBasicBlockFunctions::name(void) { llvm::PreservedAnalyses InlineBasicBlockFunctions::runOnBasicBlockFunction( llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &cont) { + const anvill::BasicBlockContext &cont, const anvill::FunctionDecl &) { F.removeFnAttr(llvm::Attribute::NoInline); F.addFnAttr(llvm::Attribute::AlwaysInline); return llvm::PreservedAnalyses::all(); diff --git a/lib/Passes/RemoveAnvillReturns.cpp b/lib/Passes/RemoveAnvillReturns.cpp deleted file mode 100644 index 4ad2f6234..000000000 --- a/lib/Passes/RemoveAnvillReturns.cpp +++ /dev/null @@ -1,77 +0,0 @@ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "Utils.h" - -namespace anvill { -llvm::StringRef RemoveAnvillReturns::name(void) { - return "Remove anvill returns"; -} - -llvm::PreservedAnalyses -RemoveAnvillReturns::run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { - auto intrinsic = F.getParent()->getFunction(anvill::kAnvillBasicBlockReturn); - bool changed = false; - - if (intrinsic) { - std::vector calls; - for (auto &insn : llvm::instructions(&F)) { - if (auto cc = llvm::dyn_cast(&insn)) { - if (cc->getCalledFunction() == intrinsic) { - calls.push_back(cc); - } - } - } - - for (auto cc : calls) { - // either it's a void return with no args or there is 1 arg that is the type of the return - if ((F.getReturnType()->isVoidTy() && cc->arg_size() == 0) || - (cc->arg_size() == 1 && - F.getReturnType() == cc->getArgOperand(0)->getType())) { - changed = true; - auto to_block = cc->getParent()->getTerminator(); - // block must be wellformed - CHECK(to_block); - to_block->eraseFromParent(); - - - if (F.getReturnType()->isVoidTy()) { - llvm::ReturnInst::Create(F.getContext(), cc->getParent()); - } else { - llvm::ReturnInst::Create(F.getContext(), cc->getArgOperand(0), - cc->getParent()); - } - - cc->eraseFromParent(); - } else { - - LOG_IF(ERROR, cc->arg_size() == 1) - << "Ret ty: " << remill::LLVMThingToString(F.getReturnType()) - << " arg mismatch: " - << remill::LLVMThingToString(cc->getArgOperand(0)->getType()); - LOG_IF(ERROR, cc->arg_size() == 0) - << "Expected void type for function with type: " - << remill::LLVMThingToString(F.getReturnType()); - } - } - } - - return changed ? llvm::PreservedAnalyses::none() - : llvm::PreservedAnalyses::all(); -} -} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/RemoveAssignmentsToNextPC.cpp b/lib/Passes/RemoveAssignmentsToNextPC.cpp deleted file mode 100644 index b1836dc6d..000000000 --- a/lib/Passes/RemoveAssignmentsToNextPC.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "Utils.h" - -namespace anvill { - -llvm::StringRef RemoveAssignmentsToNextPC::name(void) { - return "Replace stack references"; -} - - -namespace { -std::optional -UniqueAssignmentToNextPc(llvm::Function *func) { - auto target_arg = remill::NthArgument(func, remill::kNumBlockArgs); - - if (target_arg->getNumUses() == 1) { - if (auto *user = - llvm::dyn_cast(*target_arg->user_begin())) { - return user; - } - } - - return std::nullopt; -} - - -llvm::Function *GetOrCreateGotoInstrinsic(llvm::Module *mod, - llvm::IntegerType *addr_ty) { - auto fun = mod->getFunction(anvill::kAnvillGoto); - if (fun) { - return fun; - } - auto tgt_type = llvm::FunctionType::get( - llvm::Type::getVoidTy(mod->getContext()), {addr_ty}, false); - return llvm::Function::Create(tgt_type, llvm::GlobalValue::ExternalLinkage, - anvill::kAnvillGoto, mod); -} - - -llvm::BasicBlock *CreateTargetBlock(llvm::Value *mem_val, llvm::Constant *c, - llvm::Function *func, - llvm::Function *intrinsic) { - auto bb = llvm::BasicBlock::Create(func->getContext(), "", func); - - llvm::IRBuilder<> ir(bb); - ir.CreateCall(intrinsic, {c}); - ir.CreateRet(mem_val); - - return bb; -} - -} // namespace - - -namespace pats = llvm::PatternMatch; -llvm::PreservedAnalyses RemoveAssignmentsToNextPC::runOnBasicBlockFunction( - llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &cont) { - - auto next_pc_assign = UniqueAssignmentToNextPc(&F); - auto maybe_unique_ret = UniqueReturn(&F); - if (!next_pc_assign || !maybe_unique_ret) { - return llvm::PreservedAnalyses::all(); - } - - auto unique_ret = *maybe_unique_ret; - - - auto stored = (*next_pc_assign)->getValueOperand(); - // now we have threes cases we can handle: constant in which case terminate with a goto, select on constant, create a terminating if goto, - // non constant (now we could try to recover a jump table here, but instead just switch on the stored pc value) - // TODO(Ian): we may be able to use the jump table analysis here to recover more idiomatic switching.. we are essentially re-doing anvill complete switch here - llvm::Constant *first{nullptr}; - llvm::Constant *second{nullptr}; - llvm::Value *condition{nullptr}; - - auto goto_instrinsic = GetOrCreateGotoInstrinsic( - F.getParent(), this->lifter.Options().arch->AddressType()); - if (pats::match(stored, pats::m_Constant(first))) { - // TODO(Ian): should probably check pc taint - llvm::IRBuilder<> ir(unique_ret); - ir.CreateCall(goto_instrinsic, {first}); - (*next_pc_assign)->eraseFromParent(); - } else if (pats::match(stored, pats::m_Select(pats::m_Value(condition), - pats::m_Constant(first), - pats::m_Constant(second)))) { - auto mem = unique_ret->getReturnValue(); - llvm::IRBuilder<> ir(unique_ret->getParent()); - unique_ret->eraseFromParent(); - auto f = CreateTargetBlock(mem, first, &F, goto_instrinsic); - auto s = CreateTargetBlock(mem, second, &F, goto_instrinsic); - ir.CreateCondBr(condition, f, s); - (*next_pc_assign)->eraseFromParent(); - } else { - // not supported yet - return llvm::PreservedAnalyses::all(); - } - - CHECK(!llvm::verifyFunction(F, &llvm::errs())); - - return llvm::PreservedAnalyses::none(); -} - -} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/RemoveRemillFunctionReturns.cpp b/lib/Passes/RemoveRemillFunctionReturns.cpp deleted file mode 100644 index 0607a37d2..000000000 --- a/lib/Passes/RemoveRemillFunctionReturns.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) 2019-present, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "Utils.h" - -namespace anvill { -namespace { - -// Remove a single case of a call to `__remill_function_return` where the return -// addresses reaches the `pc` argument of the call. -static void FoldReturnAddressMatch(llvm::CallBase *call) { - auto module = call->getModule(); - auto ret_addr = - llvm::dyn_cast(call->getArgOperand(remill::kPCArgNum)); - auto mem_ptr = call->getArgOperand(remill::kMemoryPointerArgNum); - CopyMetadataTo(call, mem_ptr); - call->replaceAllUsesWith(mem_ptr); - call->eraseFromParent(); - - // Work up the use list of casts back to the source of this return - // address, eliminating as many of those values as possible. - while (ret_addr && ret_addr->use_empty()) { - - // Cast of `llvm.returnaddress`. - if (auto cast_inst = llvm::dyn_cast(ret_addr)) { - auto next_ret_addr = - llvm::dyn_cast(cast_inst->getOperand(0)); - ret_addr->eraseFromParent(); - ret_addr = next_ret_addr; - - // Call to `llvm.returnaddress`. - } else if (IsReturnAddress(module, ret_addr)) { - ret_addr->eraseFromParent(); - break; - - // Who knows?! - } else { - LOG(ERROR) - << "Encountered unexpected instruction when removing return address: " - << remill::LLVMThingToString(ret_addr); - break; - } - } -} - -// Override the return address in the function `func` with values from -// `fixups`. -static void OverwriteReturnAddress( - llvm::Function &func, llvm::Function *addr_of_ret_addr_func, - std::vector> &fixups) { - - // Get the address of our return address. - const auto addr_of_ret_addr = llvm::CallInst::Create( - addr_of_ret_addr_func, {}, llvm::None, llvm::Twine::createNull(), - &(func.getEntryBlock().front())); - - for (auto &[call, ret_addr] : fixups) { - // Store the return address. - llvm::IRBuilder<> ir(call); - auto *bit_cast = ir.CreateBitCast( - addr_of_ret_addr, llvm::PointerType::get(ir.getContext(), 0)); - CopyMetadataTo(call, bit_cast); - auto *store = ir.CreateStore(ret_addr, bit_cast); - CopyMetadataTo(call, store); - - // Get rid of the `__remill_function_return`. - auto *mem_ptr = call->getArgOperand(remill::kMemoryPointerArgNum); - CopyMetadataTo(call, mem_ptr); - call->replaceAllUsesWith(mem_ptr); - call->eraseFromParent(); - } -} - -} // namespace - -llvm::StringRef RemoveRemillFunctionReturns::name(void) { - return "RemoveRemillFunctionReturns"; -} - -// Try to identify the patterns of `__remill_function_call` that we can -// remove. -llvm::PreservedAnalyses -RemoveRemillFunctionReturns::run(llvm::Function &func, - llvm::FunctionAnalysisManager &AM) { - const auto module = func.getParent(); - CrossReferenceFolder xref_folder(xref_resolver, module->getDataLayout()); - StackPointerResolver sp_resolver(module, {}); - - std::vector matches_pattern; - std::vector> fixups; - - for (auto &inst : llvm::instructions(func)) { - if (auto call = llvm::dyn_cast(&inst)) { - if (auto func = call->getCalledFunction(); - func && func->getName() == "__remill_function_return") { - auto ret_addr = call->getArgOperand(remill::kPCArgNum) - ->stripPointerCastsAndAliases(); - switch ( - QueryReturnAddress(xref_folder, sp_resolver, module, ret_addr)) { - case kFoundReturnAddress: matches_pattern.push_back(call); break; - - // Do nothing if it's a symbolic stack pointer load; we're probably - // running this pass too early. - case kFoundSymbolicStackPointerLoad: break; - - // Here we'll do an arch-specific fixup. - case kUnclassifiableReturnAddress: - fixups.emplace_back(call, ret_addr); - break; - } - } - } - } - - auto ret = false; - - // Go remove all the matches that we can. - for (auto call : matches_pattern) { - FoldReturnAddressMatch(call); - ret = true; - } - - // Go use the `llvm.addressofreturnaddress` to store replace the return - // address. - if (!fixups.empty()) { - if (auto addr_of_ret_addr_func = AddressOfReturnAddressFunction(module)) { - OverwriteReturnAddress(func, addr_of_ret_addr_func, fixups); - ret = true; - } - } - - return ConvertBoolToPreserved(ret); -} - -// Returns `true` if `val` is a return address. -ReturnAddressResult RemoveRemillFunctionReturns::QueryReturnAddress( - const CrossReferenceFolder &xref_folder, - const StackPointerResolver &sp_resolver, llvm::Module *module, - llvm::Value *val) const { - - if (IsReturnAddress(module, val)) { - return kFoundReturnAddress; - } - - if (auto call = llvm::dyn_cast(val)) { - if (auto func = call->getCalledFunction()) { - if (func->getName().startswith("__remill_read_memory_")) { - auto addr = call->getArgOperand(1); // Address - if (IsRelatedToStackPointer(module, addr)) { - return kFoundSymbolicStackPointerLoad; - } else { - return kUnclassifiableReturnAddress; - } - } - } - return kUnclassifiableReturnAddress; - - } else if (auto li = llvm::dyn_cast(val)) { - if (IsRelatedToStackPointer(module, li->getPointerOperand())) { - return kFoundSymbolicStackPointerLoad; - } else { - return kUnclassifiableReturnAddress; - } - - } else if (auto pti = llvm::dyn_cast(val)) { - return QueryReturnAddress(xref_folder, sp_resolver, module, - pti->getOperand(0)); - - } else if (auto cast = llvm::dyn_cast(val)) { - return QueryReturnAddress(xref_folder, sp_resolver, module, - cast->getOperand(0)); - - } else if (IsRelatedToStackPointer(module, val)) { - return kFoundSymbolicStackPointerLoad; - - // Sometimes optimizations result in really crazy looking constant expressions - // related to `__anvill_ra`, full of shifts, zexts, etc. We try to detect - // this situation by initializing a "magic" address associated with - // `__anvill_ra`, and then if we find this magic value on something that - // references `__anvill_ra`, then we conclude that all those manipulations - // in the constant expression are actually not important. - } else if (auto xr = xref_folder.TryResolveReferenceWithClearedCache(val); - xr.is_valid && xr.references_return_address && - xr.u.address == xref_folder.MagicReturnAddressValue()) { - return kFoundReturnAddress; - - } else { - return kUnclassifiableReturnAddress; - } -} - -// Transforms the bitcode to eliminate calls to `__remill_function_return`, -// where appropriate. This will not succeed for all architectures, but is -// likely to always succeed for x86(-64) and aarch64, due to their support -// for the `llvm.addressofreturnaddress` intrinsic. -// -// When we lift bitcode, we represent the control-flow transfer semantics of -// function returns with calls to `__remill_function_return`. This is another -// three-argument Remill function, where the second argument is the program -// counter. We're particularly interested in observing this program counter -// value, as it can tell us if this function respects normal return conventions -// (i.e. returns to its return address) or not. The way we try to observe this -// is by inspecting the program counter argument, and seeing if it is -// `__anvill_ra` or the (casted) value returned from the `llvm.returnaddress` -// intrinsic. -// -// When we match the expected pattern, we can eliminate calls to -// `__remill_function_return`. If we don't match the pattern, then it suggests -// that it is possible that the function alters its return address, or that -// something is preventing our analysis from deducing that the return address -// reaches the `__remill_function_return` call's program counter argument. -// -// On x86(-64) and AArch64, we can use the `llvm.addressofreturnaddress` to -// update the return address in place when we fail to match the pattern, -// thereby letting us eliminate the call to `__remill_function_return`. -// -// NOTE(pag): This pass should be applied as late as possible, as the call to -// `__remill_function_return` depends upon the memory pointer. -void AddRemoveRemillFunctionReturns( - llvm::FunctionPassManager &fpm, - const CrossReferenceResolver &xref_resolver) { - fpm.addPass(RemoveRemillFunctionReturns(xref_resolver)); -} - -} // namespace anvill diff --git a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp b/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp deleted file mode 100644 index cea542ede..000000000 --- a/lib/Passes/ReplaceRemillFunctionReturnsWithAnvillFunctionReturns.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "Utils.h" -#include "anvill/Declarations.h" - -namespace anvill { -llvm::StringRef -ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::name(void) { - return "ReplaceRemillFunctionReturnsWithAnvillFunctionReturns"; -} - - -llvm::PreservedAnalyses -ReplaceRemillFunctionReturnsWithAnvillFunctionReturns::runOnBasicBlockFunction( - llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const anvill::BasicBlockContext &bbcont) { - - std::vector to_replace; - for (auto &insn : llvm::instructions(F)) { - if (llvm::CallBase *call = llvm::dyn_cast(&insn)) { - if (call->getCalledFunction() && - call->getCalledFunction()->getName().startswith( - "__remill_function_return")) { - - to_replace.push_back(call); - } - } - } - - - auto unique_ret = UniqueReturn(&F); - - ValueDecl ret_decl = bbcont.ReturnValue(); - remill::IntrinsicTable intrinsics(F.getParent()); - auto pres_analyses = llvm::PreservedAnalyses::all(); - for (auto rep : to_replace) { - auto state = rep->getArgOperand(0); - auto mem = rep->getArgOperand(2); - llvm::IRBuilder<> ir(rep); - ir.SetInsertPoint(rep); - // TODO(Ian): assumes the block is terminated by a ret... what about conditional returns - if (unique_ret && to_replace.size() == 1) { - ir.SetInsertPoint(*unique_ret); - } - - std::vector args; - - if (ret_decl.oredered_locs.size() != 0 && !ret_decl.type->isVoidTy()) { - args.push_back(anvill::LoadLiftedValue( - ret_decl, this->lifter.Options().TypeDictionary(), intrinsics, - this->lifter.Options().arch, ir, state, mem)); - } - - - auto tgt = GetOrCreateAnvillReturnFunc(F.getParent()); - ir.CreateCall(tgt, args); - - rep->replaceAllUsesWith(mem); - rep->eraseFromParent(); - pres_analyses = llvm::PreservedAnalyses::none(); - } - - CHECK(!llvm::verifyFunction(F, &llvm::errs())); - - return pres_analyses; -} -} // namespace anvill \ No newline at end of file diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 218e7e864..f8b3d377f 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -248,7 +248,7 @@ class StackModel { llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( llvm::Function &F, llvm::FunctionAnalysisManager &AM, - const BasicBlockContext &cont) { + const BasicBlockContext &cont, const FunctionDecl &fdecl) { size_t overrunsz = cont.GetMaxStackSize() - cont.GetStackSize(); llvm::IRBuilder<> ent_insert(&F.getEntryBlock(), F.getEntryBlock().begin()); auto overrunptr = ent_insert.CreateAlloca( @@ -265,10 +265,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( lifter.Options().stack_frame_recovery_options.stack_grows_down, cont.GetPointerDisplacement()); - StackModel smodel(cont, this->lifter.Options().arch, stk); - NullCrossReferenceResolver resolver; StackCrossReferenceResolver folder(resolver, this->lifter.DataLayout(), stk); @@ -293,8 +291,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( //TODO(Ian) handle nonzero offset if (referenced_variable.has_value()) { - auto g = anvill::ProvidePointerFromFunctionArgs( - &F, referenced_variable->decl.index, cont); + auto g = cont.ProvidePointerFromFunctionArgs( + &F, referenced_variable->decl.decl); auto ptr = GetPtrToOffsetInto(ent_insert, this->lifter.DataLayout(), referenced_variable->decl.decl.type, g, referenced_variable->offset); @@ -369,8 +367,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( // TODO(Ian): this isnt sound if the resolved stack pointer then has further manipulation causing it to land inside a variable anvill::GetBasicBlockStackPtr(&F)->addAttr(noalias); - for (auto lives : cont.LiveParamsAtEntryAndExit()) { - ProvidePointerFromFunctionArgs(&F, lives.index, cont)->addAttr(noalias); + for (auto ¶m : cont.GetParams()) { + cont.ProvidePointerFromFunctionArgs(&F, param)->addAttr(noalias); } } diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index dd6964a68..a816ed56a 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -540,6 +540,16 @@ Result ProtobufTranslator::DecodeFunction( decl.maximum_depth = decl.GetPointerDisplacement() + frame.max_frame_depth(); + for (auto &var : function.in_scope_vars()) { + auto maybe_res = DecodeParameter(var); + if (!maybe_res.Succeeded()) { + LOG(ERROR) << "Couldn't decode live variable: " << var.name() + << " " + maybe_res.TakeError(); + } else { + decl.in_scope_variables.push_back(maybe_res.TakeValue()); + } + } + if (decl.maximum_depth < decl.stack_depth) { LOG(ERROR) << "Analyzed max depth is smaller than the initial depth overriding"; diff --git a/lib/Specification.cpp b/lib/Specification.cpp index a6db53794..da790b8ae 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -417,8 +417,9 @@ void Specification::ForEachSymbol( } SpecBlockContexts::SpecBlockContexts(const Specification &spec) { - spec.ForEachFunction([this](auto decl) { + spec.ForEachFunction([this](std::shared_ptr decl) { decl->AddBBContexts(this->contexts); + funcs[decl->address] = decl; return true; }); } @@ -434,6 +435,11 @@ SpecBlockContexts::GetBasicBlockContextForAddr(uint64_t addr) const { std::cref(cont->second)}; } +const FunctionDecl & +SpecBlockContexts::GetFunctionAtAddress(uint64_t addr) const { + return *funcs.at(addr); +} + // Call `cb` on each function in the spec, until `cb` returns `false`. void Specification::ForEachFunction( std::function)> cb) const { diff --git a/lib/Type.cpp b/lib/Type.cpp index 9aa8d54ce..a6722dcf4 100644 --- a/lib/Type.cpp +++ b/lib/Type.cpp @@ -34,6 +34,28 @@ namespace anvill { +bool operator==(std::shared_ptr a, + std::shared_ptr b) { + return *a == *b; +} + +bool operator==(std::shared_ptr a, std::shared_ptr b) { + return *a == *b; +} + +bool operator==(std::shared_ptr a, std::shared_ptr b) { + return *a == *b; +} + +bool operator==(std::shared_ptr a, std::shared_ptr b) { + return *a == *b; +} + +bool operator==(std::shared_ptr a, + std::shared_ptr b) { + return *a == *b; +} + class TypeSpecifierImpl { public: llvm::LLVMContext &context; diff --git a/lib/Utils.cpp b/lib/Utils.cpp index cb7d6f305..9dea6c049 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -982,14 +982,6 @@ llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { return func->getArg(0); } -llvm::Argument * -ProvidePointerFromFunctionArgs(llvm::Function *func, size_t index, - const anvill::BasicBlockContext &context) { - CHECK(remill::kNumBlockArgs + 1 + context.LiveParamsAtEntryAndExit().size() == - func->arg_size()); - return func->getArg(index + remill::kNumBlockArgs + 1); -} - bool HasMemLoc(const ValueDecl &v) { return std::any_of(v.oredered_locs.begin(), v.oredered_locs.end(), [](const LowLoc &loc) -> bool { return loc.mem_reg; }); From d6605c74bb0ce0d16f4588a859f14a944122d019 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 29 Mar 2023 18:55:33 -0400 Subject: [PATCH 127/163] Ian/fix hash for typespecs (#367) * add hash defs * fix more pointer hashes --- include/anvill/Type.h | 120 +++++++++++++++++++++++++++++++++++++++++ include/anvill/Utils.h | 1 + lib/Declarations.cpp | 12 ----- 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/include/anvill/Type.h b/include/anvill/Type.h index bedd57265..4a239a19f 100644 --- a/include/anvill/Type.h +++ b/include/anvill/Type.h @@ -19,6 +19,13 @@ #include "Result.h" +template +inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + + namespace llvm { class DataLayout; class IntegerType; @@ -109,6 +116,7 @@ bool operator==(std::shared_ptr, std::shared_ptr); bool operator==(std::shared_ptr, std::shared_ptr); bool operator==(std::shared_ptr, std::shared_ptr); + struct PointerType { template PointerType(T &&pointee, bool is_const) @@ -273,3 +281,115 @@ class TypeTranslator { }; } // namespace anvill + + +namespace std { +template <> +struct hash { + size_t operator()(const anvill::UnknownType &unk) const { + return std::hash()(unk.size); + } +}; + +template <> +struct hash { + size_t operator()(const anvill::PointerType &unk) const { + std::size_t result = 0; + + hash_combine(result, unk.is_const); + hash_combine(result, unk.pointee); + + return result; + } +}; + + +template <> +struct hash> { + size_t operator()(const std::shared_ptr &unk) const { + + return std::hash()(*unk); + } +}; + +template <> +struct hash { + size_t operator()(const anvill::VectorType &unk) const { + + std::size_t result = 0; + + hash_combine(result, unk.size); + hash_combine(result, unk.base); + + return result; + } +}; +template <> +struct hash> { + size_t operator()(const std::shared_ptr &unk) const { + return std::hash()(*unk); + } +}; +template <> +struct hash { + size_t operator()(const anvill::ArrayType &unk) const { + std::size_t result = 0; + + hash_combine(result, unk.size); + hash_combine(result, unk.base); + + return result; + } +}; +template <> +struct hash { + size_t operator()(const anvill::StructType &unk) const { + std::size_t result = 0; + + + for (auto ty : unk.members) { + hash_combine(result, ty); + } + + return result; + } +}; +template <> +struct hash { + size_t operator()(const anvill::FunctionType &unk) const { + std::size_t result = 0; + + + for (auto ty : unk.arguments) { + hash_combine(result, ty); + } + + hash_combine(result, unk.is_variadic); + hash_combine(result, unk.return_type); + + return result; + } +}; + +template <> +struct hash> { + size_t operator()(const std::shared_ptr &unk) const { + return std::hash()(*unk); + } +}; + +template <> +struct hash> { + size_t operator()(const std::shared_ptr &unk) const { + return std::hash()(*unk); + } +}; + +template <> +struct hash> { + size_t operator()(const std::shared_ptr &unk) const { + return std::hash()(*unk); + } +}; + +} // namespace std \ No newline at end of file diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 1b76b62d1..edb8aa7a9 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -20,6 +20,7 @@ #include "anvill/Declarations.h" #include "anvill/Lifters.h" + namespace llvm { class BasicBlock; class Instruction; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 71d681f8c..167813d0c 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -42,18 +42,6 @@ #include "Protobuf.h" #include "anvill/Specification.h" - -namespace { - -template -inline void hash_combine(std::size_t &seed, const T &v) { - std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); -} - -} // namespace - - namespace std { template <> struct std::hash { From 9fad6d24534b0f2c58b893c90327843675274d63 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 3 Apr 2023 12:52:00 -0400 Subject: [PATCH 128/163] make registers no capture (#368) --- lib/Lifters/BasicBlockLifter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 8d011896a..575de6e6b 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -409,9 +409,11 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { if (std::all_of(var.oredered_locs.begin(), var.oredered_locs.end(), [](const LowLoc &loc) -> bool { return loc.reg; })) { - // Registers should not have aliases + // Registers should not have aliases, or be captured arg->addAttr(llvm::Attribute::get(llvm_context, llvm::Attribute::AttrKind::NoAlias)); + arg->addAttr(llvm::Attribute::get(llvm_context, + llvm::Attribute::AttrKind::NoCapture)); } start_ind += 1; From a52d812c93f3bc45b9d5aae34d3e5e194bb17cbf Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 7 Apr 2023 19:37:28 -0400 Subject: [PATCH 129/163] Keep memory consistent through basic block calls and returns (#370) * use memory pointer for calling sub basic blocks * more complete mem passing --- lib/Lifters/BasicBlockLifter.cpp | 15 ++++++++++++++- lib/Lifters/FunctionLifter.cpp | 6 +++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 575de6e6b..4f6826569 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -424,6 +425,10 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto pc = remill::NthArgument(func, remill::kPCArgNum); memory->setName("memory"); + memory->addAttr( + llvm::Attribute::get(llvm_context, llvm::Attribute::AttrKind::NoAlias)); + memory->addAttr( + llvm::Attribute::get(llvm_context, llvm::Attribute::AttrKind::NoCapture)); pc->setName("program_counter"); state->setName("stack"); @@ -460,7 +465,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto should_return = ir.CreateAlloca(llvm::IntegerType::getInt1Ty(context), nullptr, "should_return"); ir.CreateStore(llvm::ConstantInt::getFalse(context), should_return); - ir.CreateStore(memory, ir.CreateAlloca(memory->getType(), nullptr, "MEMORY")); + auto lded_mem = + ir.CreateLoad(llvm::PointerType::get(this->llvm_context, 0), memory); + + ir.CreateStore(lded_mem, + ir.CreateAlloca(llvm::PointerType::get(this->llvm_context, 0), + nullptr, "MEMORY")); this->state_ptr = this->AllocateAndInitializeStateStructure(&blk, options.arch); @@ -546,6 +556,9 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { BasicBlockFunction bbf{func, pc_arg, mem_arg, next_pc, state}; + + ir.CreateStore(ret_mem, memory); + ir.CreateStore(ret_mem, remill::LoadMemoryPointerRef(ir.GetInsertBlock())); TerminateBasicBlockFunction(func, ir, ret_mem, should_return, bbf); return bbf; diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 0a227d1d0..9e666e117 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -499,10 +499,10 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // How should control flow redirection behave in this case? auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->func_address); - auto memptr = remill::LoadMemoryPointer(ir, this->intrinsics); - auto call_inst = entry_lifter.CallBasicBlockFunction( - ir, lifted_func_st.state_ptr, abstract_stack, memptr); + ir, lifted_func_st.state_ptr, abstract_stack, this->mem_ptr_ref); + + auto memptr = remill::LoadMemoryPointer(ir, this->intrinsics); if (!call_inst->getType()->isVoidTy()) { // TODO(Ian): this memptr is not right From 6a68180b4069f50f47b0e800d1a5411e43e79fb1 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 4 May 2023 09:20:19 -0400 Subject: [PATCH 130/163] use irene ghidra install (#374) --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d0fadf222..a02a2bb9d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -166,10 +166,10 @@ jobs: echo ~/.cargo/bin >>$GITHUB_PATH - name: "Setup Ghidra" + working-directory: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 run: | - wget https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.1.5_build/ghidra_10.1.5_PUBLIC_20220726.zip --output-document=ghidra.zip - unzip ghidra.zip - echo "GHIDRA_INSTALL_DIR=$(pwd)/ghidra_10.1.5_PUBLIC" >> $GITHUB_ENV + just install-ghidra + echo "GHIDRA_INSTALL_DIR=$(pwd)/deps/ghidra" >> $GITHUB_ENV - name: Install Ghidra Spec Generation shell: bash From 5c16d8ed93fd23e30d94fa518fb4eddf31b12313 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 4 May 2023 13:51:58 -0400 Subject: [PATCH 131/163] Update remill for anvill (#371) * bump remill * bump remill * try bump ci to 22.04 * bump dockerfile * bump cxx common * install venv --- .github/workflows/build.yml | 24 ++++++++++++------------ Dockerfile | 10 +++++----- remill | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a02a2bb9d..0dadb7988 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,7 +26,7 @@ on: jobs: cleanup_stale_workflows: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Clone the anvill repository @@ -51,9 +51,9 @@ jobs: fail-fast: false matrix: image: - - { name: "ubuntu", tag: "20.04" } + - { name: "ubuntu", tag: "22.04" } llvm: ["15"] - cxxcommon_version: ["v0.2.22"] + cxxcommon_version: ["v0.2.24"] runs-on: labels: gha-ubuntu-32 @@ -420,7 +420,7 @@ jobs: matrix: os: ["macos-11"] llvm: ["15"] - cxxcommon_version: ["v0.2.22"] + cxxcommon_version: ["v0.2.24"] runs-on: macos-12 @@ -675,7 +675,7 @@ jobs: passes_ci: needs: build_linux - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 if: always() steps: - name: Successful linux build @@ -688,7 +688,7 @@ jobs: release_packages: # Do not run the release procedure if any of the builds has failed needs: [build_linux, build_macos] - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') steps: @@ -723,13 +723,13 @@ jobs: - name: Group the packages by platform run: | - zip -r9 anvill_ubuntu-20.04_packages.zip \ - ubuntu-20.04* + zip -r9 anvill_ubuntu-22.04_packages.zip \ + ubuntu-22.04* zip -r9 anvill_macos-11_packages.zip \ macos-11* - - name: Upload the Ubuntu 20.04 packages + - name: Upload the Ubuntu 22.04 packages uses: actions/upload-release-asset@v1 env: @@ -737,8 +737,8 @@ jobs: with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: anvill_ubuntu-20.04_packages.zip - asset_name: anvill_ubuntu-20.04_packages.zip + asset_path: anvill_ubuntu-22.04_packages.zip + asset_name: anvill_ubuntu-22.04_packages.zip asset_content_type: application/gzip - name: Upload the macOS 11 packages @@ -761,7 +761,7 @@ jobs: strategy: matrix: llvm: ["15"] - ubuntu: ["20.04"] + ubuntu: ["22.04"] steps: - uses: actions/checkout@v3 with: diff --git a/Dockerfile b/Dockerfile index 7718a962b..9ba3740dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ ARG LLVM_VERSION=15 ARG ARCH=amd64 -ARG UBUNTU_VERSION=20.04 -ARG CXX_COMMON_VERSION=0.2.16 +ARG UBUNTU_VERSION=22.04 +ARG CXX_COMMON_VERSION=0.2.24 ARG DISTRO_BASE=ubuntu${UBUNTU_VERSION} ARG BUILD_BASE=ubuntu:${UBUNTU_VERSION} ARG LIBRARIES=/opt/trailofbits @@ -15,7 +15,7 @@ ARG LLVM_VERSION ARG CXX_COMMON_VERSION ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get install -qqy --no-install-recommends git libdbus-1-3 curl unzip python3 python3-pip python3.8 python3.8-venv python3-setuptools xz-utils cmake && \ + apt-get install -qqy --no-install-recommends git libdbus-1-3 curl unzip python3 python3-pip python3-setuptools xz-utils cmake && \ rm -rf /var/lib/apt/lists/* #### NOTE #### @@ -32,7 +32,7 @@ ARG CXX_COMMON_VERSION ARG LIBRARIES RUN apt-get update && \ - apt-get install -qqy xz-utils python3.8-venv make rpm && \ + apt-get install -qqy xz-utils python3 python3.10-venv make rpm && \ rm -rf /var/lib/apt/lists/* # Build dependencies @@ -59,7 +59,7 @@ ENV VIRTUAL_ENV=/opt/trailofbits/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # create a virtualenv in /opt/trailofbits/venv -RUN python3.8 -m venv ${VIRTUAL_ENV} +RUN python3 -m venv ${VIRTUAL_ENV} # Needed for sourcing venv SHELL ["/bin/bash", "-c"] diff --git a/remill b/remill index b26e8ebe0..a709d0ac5 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit b26e8ebe0e443622f3b3606bb184256a59e02bde +Subproject commit a709d0ac58852bce021b21b4aa1879a7c6ed7930 From 1364c44b7306d9ca3fa99eede0256a0f419198e1 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Thu, 4 May 2023 13:52:20 -0400 Subject: [PATCH 132/163] Swap AArch64 to use Sleigh Semantics (#377) * bump remill * bump remill * swap to sleigh for aarch64 --- lib/Specification.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Specification.cpp b/lib/Specification.cpp index da790b8ae..084b14bb0 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -283,7 +283,7 @@ GetArch(llvm::LLVMContext &context, arch_name = remill::kArchAMD64_AVX512; break; case ::specification::ARCH_AARCH64: - arch_name = remill::kArchAArch64LittleEndian; + arch_name = remill::kArchAArch64LittleEndian_SLEIGH; break; case ::specification::ARCH_AARCH32: arch_name = remill::kArchAArch32LittleEndian; From 9f5518ce75377a34691d5b65e4dc2b203c56b6d9 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 4 May 2023 21:32:48 +0200 Subject: [PATCH 133/163] Split symvals into equalities at entry and exit (#375) * Split symvals into equalities at entry and exit * Ignore assignments active at entry and exit * Factor out some code * More refactoring * Only pack live values at exit if not constant --------- Co-authored-by: 2over12 --- data_specifications/specification.proto | 7 +-- include/anvill/Declarations.h | 37 +++++++++++---- lib/Declarations.cpp | 62 ++++++++++++++++++++----- lib/Lifters/BasicBlockLifter.cpp | 4 +- lib/Protobuf.cpp | 25 +++++++--- 5 files changed, 102 insertions(+), 33 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 5847a04f2..150558603 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -259,10 +259,11 @@ message BlockContext { // Affine equalities between values // and high symbols at entry to // the block - repeated ValueMapping symvals = 1; + repeated ValueMapping symvals_at_entry = 1; + repeated ValueMapping symvals_at_exit = 2; - repeated Parameter live_at_entries = 2; - repeated Parameter live_at_exits = 3; + repeated Parameter live_at_entries = 3; + repeated Parameter live_at_exits = 4; } diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 5d2f4b892..a6bdfe713 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -137,6 +137,8 @@ struct ConstantDomain { ValueDecl target_value; std::uint64_t value; bool should_taint_by_pc; + + bool operator==(const ConstantDomain &) const = default; }; struct SpecStackOffsets { @@ -239,9 +241,11 @@ class BasicBlockContext { virtual ~BasicBlockContext() = default; - virtual const SpecStackOffsets &GetStackOffsets() const = 0; + virtual const SpecStackOffsets &GetStackOffsetsAtEntry() const = 0; + virtual const SpecStackOffsets &GetStackOffsetsAtExit() const = 0; - virtual const std::vector &GetConstants() const = 0; + virtual const std::vector &GetConstantsAtEntry() const = 0; + virtual const std::vector &GetConstantsAtExit() const = 0; virtual size_t GetStackSize() const = 0; @@ -320,21 +324,29 @@ struct FunctionDecl; class SpecBlockContext : public BasicBlockContext { private: const FunctionDecl &decl; - SpecStackOffsets offsets; - std::vector constants; + SpecStackOffsets offsets_at_entry; + SpecStackOffsets offsets_at_exit; + std::vector constants_at_entry; + std::vector constants_at_exit; std::vector live_params_at_entry; std::vector live_params_at_exit; std::vector params; public: - SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets, - std::vector constants, + SpecBlockContext(const FunctionDecl &decl, SpecStackOffsets offsets_at_entry, + SpecStackOffsets offsets_at_exit, + std::vector constants_at_entry, + std::vector constants_at_exit, std::vector live_params_at_entry, std::vector live_params_at_exit); - virtual const SpecStackOffsets &GetStackOffsets() const override; + virtual const SpecStackOffsets &GetStackOffsetsAtEntry() const override; + virtual const SpecStackOffsets &GetStackOffsetsAtExit() const override; - virtual const std::vector &GetConstants() const override; + virtual const std::vector & + GetConstantsAtEntry() const override; + virtual const std::vector & + GetConstantsAtExit() const override; virtual ValueDecl ReturnValue() const override; @@ -387,7 +399,9 @@ struct FunctionDecl : public CallableDecl { std::unordered_map locals; - std::unordered_map stack_offsets; + std::unordered_map stack_offsets_at_entry; + + std::unordered_map stack_offsets_at_exit; std::unordered_map> live_regs_at_entry; @@ -396,7 +410,10 @@ struct FunctionDecl : public CallableDecl { live_regs_at_exit; std::unordered_map> - constant_values; + constant_values_at_entry; + + std::unordered_map> + constant_values_at_exit; std::uint64_t stack_depth; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 167813d0c..8b163b878 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -162,7 +162,29 @@ std::vector BasicBlockContext::LiveBBParamsAtExit() const { std::vector res; std::copy_if( alllive.begin(), alllive.end(), std::back_inserter(res), - [](const BasicBlockVariable &bbvar) { return bbvar.live_at_exit; }); + [&](const BasicBlockVariable &bbvar) { + if (!bbvar.live_at_exit) { + return false; + } + auto &consts_at_exit = GetConstantsAtExit(); + if (std::find_if(consts_at_exit.begin(), consts_at_exit.end(), + [&](const ConstantDomain &cdomain) { + return cdomain.target_value == bbvar.param; + }) != consts_at_exit.end()) { + return false; + } + + auto &offset_at_exit = GetStackOffsetsAtExit(); + if (std::find_if(offset_at_exit.affine_equalities.begin(), + offset_at_exit.affine_equalities.end(), + [&](const OffsetDomain &odomain) { + return odomain.target_value == bbvar.param; + }) != offset_at_exit.affine_equalities.end()) { + return false; + } + + return true; + }); return res; } @@ -247,13 +269,17 @@ size_t SpecBlockContext::GetMaxStackSize() const { SpecBlockContext::SpecBlockContext( - const FunctionDecl &decl, SpecStackOffsets offsets, - std::vector constants, + const FunctionDecl &decl, SpecStackOffsets offsets_at_entry, + SpecStackOffsets offsets_at_exit, + std::vector constants_at_entry, + std::vector constants_at_exit, std::vector live_params_at_entry, std::vector live_params_at_exit) : decl(decl), - offsets(std::move(offsets)), - constants(std::move(constants)), + offsets_at_entry(std::move(offsets_at_entry)), + offsets_at_exit(std::move(offsets_at_exit)), + constants_at_entry(std::move(constants_at_entry)), + constants_at_exit(std::move(constants_at_exit)), live_params_at_entry(std::move(live_params_at_entry)), live_params_at_exit(std::move(live_params_at_exit)), params(decl.in_scope_variables) {} @@ -270,12 +296,22 @@ const std::vector &SpecBlockContext::LiveParamsAtEntry() const { return this->live_params_at_entry; } -const SpecStackOffsets &SpecBlockContext::GetStackOffsets() const { - return this->offsets; +const SpecStackOffsets &SpecBlockContext::GetStackOffsetsAtEntry() const { + return this->offsets_at_entry; +} + +const SpecStackOffsets &SpecBlockContext::GetStackOffsetsAtExit() const { + return this->offsets_at_exit; +} + +const std::vector & +SpecBlockContext::GetConstantsAtEntry() const { + return this->constants_at_entry; } -const std::vector &SpecBlockContext::GetConstants() const { - return this->constants; +const std::vector & +SpecBlockContext::GetConstantsAtExit() const { + return this->constants_at_exit; } const std::vector &SpecBlockContext::GetParams() const { @@ -454,8 +490,12 @@ size_t FunctionDecl::GetPointerDisplacement() const { SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { return SpecBlockContext( - *this, GetWithDef(addr, this->stack_offsets, SpecStackOffsets()), - GetWithDef(addr, this->constant_values, std::vector()), + *this, GetWithDef(addr, this->stack_offsets_at_entry, SpecStackOffsets()), + GetWithDef(addr, this->stack_offsets_at_exit, SpecStackOffsets()), + GetWithDef(addr, this->constant_values_at_entry, + std::vector()), + GetWithDef(addr, this->constant_values_at_exit, + std::vector()), GetWithDef(addr, this->live_regs_at_entry, std::vector()), GetWithDef(addr, this->live_regs_at_exit, std::vector())); } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 4f6826569..43938d857 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -484,7 +484,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { // Initialize the stack pointer. ir.CreateStore(sp_value, sp_ptr); - auto stack_offsets = this->block_context->GetStackOffsets(); + auto stack_offsets = this->block_context->GetStackOffsetsAtEntry(); for (auto ®_off : stack_offsets.affine_equalities) { auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( ir, this->sp_reg, this->block_def.addr, reg_off.stack_offset); @@ -504,7 +504,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { this->UnpackLiveValues(ir, ptr_provider, this->state_ptr, this->block_context->LiveBBParamsAtEntry()); - for (auto ®_const : this->block_context->GetConstants()) { + for (auto ®_const : block_context->GetConstantsAtEntry()) { llvm::Value *new_value = nullptr; llvm::Type *target_type = reg_const.target_value.type; if (reg_const.should_taint_by_pc) { diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index a816ed56a..5684a52b0 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -622,10 +622,13 @@ void ProtobufTranslator::ParseCFGIntoFunction( for (auto &[blk_addr, ctx] : obj.block_context()) { - std::vector stack_offsets; - std::vector constant_values; + std::vector stack_offsets_at_entry, stack_offsets_at_exit; + std::vector constant_values_at_entry, + constant_values_at_exit; auto blk = decl.cfg[blk_addr]; - for (auto &symval : ctx.symvals()) { + auto symval_to_domains = [&](const specification::ValueMapping &symval, + std::vector &stack_offsets, + std::vector &constant_values) { if (!symval.has_target_value()) { LOG(FATAL) << "All equalities must have a target"; } @@ -643,7 +646,7 @@ void ProtobufTranslator::ParseCFGIntoFunction( if (!target_vdecl.Succeeded()) { LOG(ERROR) << "Failed to lift value " << target_vdecl.TakeError(); - continue; + return; } if (!symval.has_curr_val()) { @@ -673,11 +676,19 @@ void ProtobufTranslator::ParseCFGIntoFunction( LOG(FATAL) << symval.curr_val().GetTypeName() << " is unimplemented for affine relations"; } + }; + + for (auto &symval : ctx.symvals_at_entry()) { + symval_to_domains(symval, + decl.stack_offsets_at_entry[blk_addr].affine_equalities, + decl.constant_values_at_entry[blk_addr]); } - SpecStackOffsets off = {stack_offsets}; - decl.stack_offsets.emplace(blk_addr, std::move(off)); - decl.constant_values.emplace(blk_addr, std::move(constant_values)); + for (auto &symval : ctx.symvals_at_exit()) { + symval_to_domains(symval, + decl.stack_offsets_at_exit[blk_addr].affine_equalities, + decl.constant_values_at_exit[blk_addr]); + } this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_addr, ctx.live_at_entries()); From 046702ee9f32a52d0322349a29b1cc81311c96e4 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 10 May 2023 07:35:17 -0600 Subject: [PATCH 134/163] bump remill again --- remill | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remill b/remill index a709d0ac5..22b3d4b23 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit a709d0ac58852bce021b21b4aa1879a7c6ed7930 +Subproject commit 22b3d4b2318c6ba84cb4e66abb2d37a39a5a33bf From 5741f825b9726a04693b9271ac5a7053f1436408 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 11 May 2023 00:49:19 +1000 Subject: [PATCH 135/163] Performance fixes (#348) * Performance fixes * Begin verifying modules move the DSE pass again * bump remill * Fix Remill submodule * Only verify LLVM functions and modules in debug mode --------- Co-authored-by: 2over12 --- lib/Arch/AArch64_C.cpp | 2 +- lib/Arch/PPC_SysV.cpp | 2 +- lib/Declarations.cpp | 16 +++++++-------- lib/Lifters/BasicBlockLifter.cpp | 22 ++++++++++----------- lib/Lifters/CodeLifter.cpp | 8 ++------ lib/Lifters/FunctionLifter.cpp | 4 ++-- lib/Passes/RemoveCallIntrinsics.cpp | 14 +++++++------- lib/Passes/ReplaceStackReferences.cpp | 28 +++++++++++++-------------- 8 files changed, 46 insertions(+), 50 deletions(-) diff --git a/lib/Arch/AArch64_C.cpp b/lib/Arch/AArch64_C.cpp index 2778a65a9..426b5524c 100644 --- a/lib/Arch/AArch64_C.cpp +++ b/lib/Arch/AArch64_C.cpp @@ -206,7 +206,7 @@ AArch64_C::BindReturnValues(llvm::Function &function, bool &injected_sret, std::vector &ret_values) { llvm::Type *ret_type = function.getReturnType(); - LOG(INFO) << "Binding on return " << remill::LLVMThingToString(ret_type); + DLOG(INFO) << "Binding on return " << remill::LLVMThingToString(ret_type); injected_sret = false; // If there is an sret parameter then it is a special case. diff --git a/lib/Arch/PPC_SysV.cpp b/lib/Arch/PPC_SysV.cpp index 85f453ccf..3e4fd511c 100644 --- a/lib/Arch/PPC_SysV.cpp +++ b/lib/Arch/PPC_SysV.cpp @@ -136,7 +136,7 @@ PPC_SysV::BindReturnValues(llvm::Function &function, std::vector &ret_values) { auto ret_type = function.getReturnType(); - LOG(INFO) << "Binding on return " << remill::LLVMThingToString(ret_type); + DLOG(INFO) << "Binding on return " << remill::LLVMThingToString(ret_type); // If there is an sret parameter then it is a special case. if (function.hasStructRetAttr()) { diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 8b163b878..c66dd6e71 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -342,8 +342,8 @@ llvm::Value *CallableDecl::CallFromLiftedBlock( // the function, which will be based off of the register state // on entry to the function. auto new_sp_base = return_stack_pointer->AddressOf(state_ptr, ir); - LOG(INFO) << "Modifying ret stack pointer by: " - << return_stack_pointer_offset; + DLOG(INFO) << "Modifying ret stack pointer by: " + << return_stack_pointer_offset; // TODO(Ian): this could go in the wrong direction if stack option is set to go up const auto sp_val_on_exit = ir.CreateAdd( @@ -505,9 +505,9 @@ AbstractStack::StackOffsetFromStackPointer(std::int64_t stack_off) const { if (this->stack_grows_down) { auto displaced_offset = stack_off - static_cast(this->pointer_displacement); - LOG(INFO) << this->total_size; - LOG(INFO) << "disp: " << this->pointer_displacement; - LOG(INFO) << "Displaced offset: " << displaced_offset; + DLOG(INFO) << this->total_size; + DLOG(INFO) << "disp: " << this->pointer_displacement; + DLOG(INFO) << "Displaced offset: " << displaced_offset; if (!(static_cast(this->total_size) >= llabs(displaced_offset))) { return std::nullopt; @@ -550,13 +550,13 @@ AbstractStack::PointerToStackMemberFromOffset(llvm::IRBuilder<> &ir, } auto i32 = llvm::IntegerType::getInt32Ty(this->context); - LOG(INFO) << "Looking for offset" << *off; + DLOG(INFO) << "Looking for offset" << *off; auto curr_off = 0; auto curr_ind = 0; for (auto [sz, ptr] : this->components) { if (off < curr_off + sz) { - LOG(INFO) << "Found for " << remill::LLVMThingToString(ptr); - LOG(INFO) << curr_off << " " << sz; + DLOG(INFO) << "Found for " << remill::LLVMThingToString(ptr); + DLOG(INFO) << curr_off << " " << sz; return ir.CreateGEP(this->stack_types[curr_ind], ptr, {llvm::ConstantInt::get(i32, 0), llvm::ConstantInt::get(i32, *off - curr_off)}); diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 43938d857..508f01837 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -34,8 +34,8 @@ namespace anvill { void BasicBlockLifter::LiftBasicBlockFunction() { auto bbfunc = this->CreateBasicBlockFunction(); this->LiftInstructionsIntoLiftedFunction(); - CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); - CHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*bbfunc.func, &llvm::errs())); this->RecursivelyInlineFunctionCallees(bbfunc.func); } @@ -151,8 +151,8 @@ BasicBlockLifter::LoadFunctionReturnAddress(const remill::Instruction &inst, // TODO(pag, kumarak): Does a zero value in `enc.u.imm22` imply a no-return // function? Try this on Compiler Explorer! if (!enc.u.op && !enc.u.op2) { - LOG(INFO) << "Found structure return of size " << enc.u.imm22 << " to " - << std::hex << pc << " at " << inst.pc << std::dec; + DLOG(INFO) << "Found structure return of size " << enc.u.imm22 << " to " + << std::hex << pc << " at " << inst.pc << std::dec; llvm::IRBuilder<> ir(block); return {pc + 4u, @@ -314,13 +314,13 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { auto init_context = this->CreateDecodingContext(this->block_def); - LOG(INFO) << "Decoding block at addr: " << std::hex << this->block_def.addr - << " with size " << this->block_def.size; + DLOG(INFO) << "Decoding block at addr: " << std::hex << this->block_def.addr + << " with size " << this->block_def.size; bool ended_on_terminal = false; while (reached_addr < this->block_def.addr + this->block_def.size && !ended_on_terminal) { auto addr = reached_addr; - LOG(INFO) << "Decoding at addr " << std::hex << addr; + DLOG(INFO) << "Decoding at addr " << std::hex << addr; auto res = this->DecodeInstructionInto(addr, false, &inst, init_context); if (!res) { remill::AddTerminatingTailCall(bb, this->intrinsics.error, @@ -342,7 +342,7 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride(inst, bb); - LOG_IF(INFO, ended_on_terminal) + DLOG_IF(INFO, ended_on_terminal) << "On terminal at addr: " << std::hex << addr; } @@ -499,8 +499,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { return this->block_context->ProvidePointerFromFunctionArgs(func, param); }; - LOG(INFO) << "Live values at entry to function " - << this->block_context->LiveBBParamsAtEntry().size(); + DLOG(INFO) << "Live values at entry to function " + << this->block_context->LiveBBParamsAtEntry().size(); this->UnpackLiveValues(ir, ptr_provider, this->state_ptr, this->block_context->LiveBBParamsAtEntry()); @@ -706,7 +706,7 @@ llvm::CallInst *BasicBlockLifter::CallBasicBlockFunction( PointerProvider ptr_provider = [&builder, this, out_param_locals, &stack](const ParameterDecl &repr_var) -> llvm::Value * { - LOG(INFO) << "Lifting: " << repr_var.name << " for call"; + DLOG(INFO) << "Lifting: " << repr_var.name << " for call"; if (HasMemLoc(repr_var)) { // TODO(Ian): the assumption here since we are able to build a single pointer here into the frame is that // svars are single valuedecl contigous diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index c97e133c0..d91b873f1 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -277,11 +277,7 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { // Initialize cleanup optimizations - if (llvm::verifyFunction(*inf, &llvm::errs())) { - - LOG(FATAL) << "Function verification failed: " << inf->getName().str() - << " " << remill::LLVMThingToString(inf->getType()); - } + DCHECK(llvm::verifyFunction(*inf, &llvm::errs())); llvm::legacy::FunctionPassManager fpm(inf->getParent()); fpm.add(llvm::createCFGSimplificationPass()); @@ -299,4 +295,4 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { ClearVariableNames(inf); } -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 9e666e117..6d74702ee 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -319,7 +319,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( // `__attribute__((flatten))`, i.e. recursively inline as much as possible, so // that all semantics and helpers are completely inlined. void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) { - CHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); + DCHECK(!llvm::verifyModule(*this->native_func->getParent(), &llvm::errs())); this->RecursivelyInlineFunctionCallees(this->native_func); } @@ -522,7 +522,7 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // Go lift all instructions! VisitBlocks(lifted_func_st.state_ptr, abstract_stack); - CHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*this->lifted_func, &llvm::errs())); // Fill up `native_func` with a basic block and make it call `lifted_func`. // This creates things like the stack-allocated `State` structure. diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index be4ae8a68..8a58943a1 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -37,7 +37,7 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, remillFunctionCall->getFunction()->getParent()->getDataLayout()); auto ra = xref_folder.TryResolveReferenceWithClearedCache(target_func); auto f = remillFunctionCall->getFunction(); - CHECK(!llvm::verifyFunction(*f, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*f, &llvm::errs())); if (ra.references_entity || // Related to an existing lifted entity. ra.references_global_value || // Related to a global var/func. @@ -53,10 +53,10 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, const remill::IntrinsicTable table( remillFunctionCall->getFunction()->getParent()); - LOG(INFO) << "Replacing call from: " - << remill::LLVMThingToString(remillFunctionCall) - << " with call to " << std::hex << ra.u.address - << " d has: " << std::string(entity->getName()); + DLOG(INFO) << "Replacing call from: " + << remill::LLVMThingToString(remillFunctionCall) + << " with call to " << std::hex << ra.u.address + << " d has: " << std::string(entity->getName()); auto new_mem = fdecl->CallFromLiftedBlock(entity, lifter.Options().TypeDictionary(), table, ir, state_ptr, mem_ptr); @@ -67,7 +67,7 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, } } - CHECK(!llvm::verifyFunction(*f, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*f, &llvm::errs())); return prev; } @@ -82,4 +82,4 @@ bool RemoveCallIntrinsics::isTargetInstrinsic(const llvm::CallInst *callinsn) { callinsn->getCalledFunction()->getName().startswith( "__remill_function_call"); } -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index f8b3d377f..3d9bfbfa7 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -61,7 +61,7 @@ class StackCrossReferenceResolver : public CrossReferenceFolder { protected: virtual std::optional ResolveValueCallback(llvm::Value *v) const override { - LOG(INFO) << "Looking at: " << remill::LLVMThingToString(v); + DLOG(INFO) << "Looking at: " << remill::LLVMThingToString(v); auto stack_ref = abs_stack.StackPointerFromStackCompreference(v); if (stack_ref) { return this->StackPtrToXref(*stack_ref); @@ -202,17 +202,17 @@ class StackModel { return std::nullopt; } - LOG(INFO) << "value found lte offset: " - << vlte->decl.oredered_locs[0].mem_offset << " " << off; + DLOG(INFO) << "value found lte offset: " + << vlte->decl.oredered_locs[0].mem_offset << " " << off; auto offset_into_var = off - vlte->decl.oredered_locs[0].mem_offset; if (offset_into_var < static_cast(GetParamDeclSize(vlte->decl))) { return {{offset_into_var, *vlte}}; } - LOG(INFO) << "Looking for off " << off << " but not fitting " - << offset_into_var << " got off " - << vlte->decl.oredered_locs[0].mem_offset; + DLOG(INFO) << "Looking for off " << off << " but not fitting " + << offset_into_var << " got off " + << vlte->decl.oredered_locs[0].mem_offset; return std::nullopt; } @@ -254,10 +254,10 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( auto overrunptr = ent_insert.CreateAlloca( AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); - LOG(INFO) << "Replacing stack vars in bb: " << std::hex - << *anvill::GetBasicBlockAddr(&F); - LOG(INFO) << "Stack size " << cont.GetStackSize(); - LOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); + DLOG(INFO) << "Replacing stack vars in bb: " << std::hex + << *anvill::GetBasicBlockAddr(&F); + DLOG(INFO) << "Stack size " << cont.GetStackSize(); + DLOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); AbstractStack stk( F.getContext(), {{cont.GetStackSize(), anvill::GetBasicBlockStackPtr(&F)}, @@ -307,8 +307,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( collision = true; } - LOG(INFO) << "Escaping stack access " << stack_offset << " " - << remill::LLVMThingToString(use->get()); + DLOG(INFO) << "Escaping stack access " << stack_offset << " " + << remill::LLVMThingToString(use->get()); // otherwise we are going to escape the abstract stack to_replace_vars.push_back({use, stack_offset}); @@ -353,7 +353,7 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( } } - CHECK(!llvm::verifyFunction(F, &llvm::errs())); + DCHECK(!llvm::verifyFunction(F, &llvm::errs())); // This isnt a sound check at all we could still derive a pointer to a variable from another variable. Essentially need to check that all @@ -375,4 +375,4 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( return to_replace_vars.empty() ? llvm::PreservedAnalyses::all() : llvm::PreservedAnalyses::none(); } -} // namespace anvill \ No newline at end of file +} // namespace anvill From 942646725db7b3c60b39fdaa24c6466455b322be Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Sat, 20 May 2023 06:49:09 +1000 Subject: [PATCH 136/163] Get Anvill building with LLVM 16 (#376) * Get Anvill building with LLVM 16 * Update scripts and CI to use LLVM 16 * Bump cxx-common versions * Fix scripts to work with new `cxx-common` * Put code to add SROA pass behind a helper * Bump Remill * Bump Remill now that LLVM 16 support has been merged into `master` * Bump `cxx-common` to v0.3.2 * Remove LLVM 15 support --- .github/workflows/build.yml | 22 ++++++++-------- Dockerfile | 4 +-- README.md | 4 +-- include/anvill/Utils.h | 2 ++ lib/Optimize.cpp | 10 +++---- lib/Passes/SliceManager.cpp | 26 +++++++++---------- lib/Passes/SplitStackFrameAtReturnAddress.cpp | 4 +-- lib/Passes/TransformRemillJumpIntrinsics.cpp | 8 +++--- lib/Passes/Utils.cpp | 4 +-- lib/Utils.cpp | 3 ++- scripts/build.sh | 23 +++++++++------- scripts/run-on-anghabench.sh | 2 +- 12 files changed, 58 insertions(+), 54 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0dadb7988..040caa05c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,8 +52,8 @@ jobs: matrix: image: - { name: "ubuntu", tag: "22.04" } - llvm: ["15"] - cxxcommon_version: ["v0.2.24"] + llvm: ["16"] + cxxcommon_version: ["v0.3.2"] runs-on: labels: gha-ubuntu-32 @@ -418,9 +418,9 @@ jobs: strategy: fail-fast: false matrix: - os: ["macos-11"] - llvm: ["15"] - cxxcommon_version: ["v0.2.24"] + os: ["macos-12"] + llvm: ["16"] + cxxcommon_version: ["v0.3.2"] runs-on: macos-12 @@ -497,7 +497,7 @@ jobs: id: cxxcommon_installer working-directory: ${{ steps.build_paths.outputs.DOWNLOADS }} run: | - folder_name="vcpkg_${{ matrix.os }}_llvm-${{ matrix.llvm }}_xcode-13.0_amd64" + folder_name="vcpkg_${{ matrix.os }}_llvm-${{ matrix.llvm }}_xcode-14.2_amd64" archive_name="${folder_name}.tar.xz" url="https://github.com/lifting-bits/cxx-common/releases/download/${{ matrix.cxxcommon_version}}/${archive_name}" @@ -726,8 +726,8 @@ jobs: zip -r9 anvill_ubuntu-22.04_packages.zip \ ubuntu-22.04* - zip -r9 anvill_macos-11_packages.zip \ - macos-11* + zip -r9 anvill_macos-12_packages.zip \ + macos-12* - name: Upload the Ubuntu 22.04 packages uses: actions/upload-release-asset@v1 @@ -749,8 +749,8 @@ jobs: with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: anvill_macos-11_packages.zip - asset_name: anvill_macos-11_packages.zip + asset_path: anvill_macos-12_packages.zip + asset_name: anvill_macos-12_packages.zip asset_content_type: application/gzip Docker_Linux: @@ -760,7 +760,7 @@ jobs: strategy: matrix: - llvm: ["15"] + llvm: ["16"] ubuntu: ["22.04"] steps: - uses: actions/checkout@v3 diff --git a/Dockerfile b/Dockerfile index 9ba3740dd..16a3f2714 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ -ARG LLVM_VERSION=15 +ARG LLVM_VERSION=16 ARG ARCH=amd64 ARG UBUNTU_VERSION=22.04 -ARG CXX_COMMON_VERSION=0.2.24 +ARG CXX_COMMON_VERSION=0.3.2 ARG DISTRO_BASE=ubuntu${UBUNTU_VERSION} ARG BUILD_BASE=ubuntu:${UBUNTU_VERSION} ARG LIBRARIES=/opt/trailofbits diff --git a/README.md b/README.md index dd097a7d8..63fb13086 100644 --- a/README.md +++ b/README.md @@ -103,10 +103,10 @@ Or you can tell CMake where to find the remill installation prefix by passing `- ### Docker image -To build via Docker run, specify the architecture, base Ubuntu image and LLVM version. For example, to build Anvill linking against LLVM 15 on Ubuntu 20.04 on AMD64 do: +To build via Docker run, specify the architecture, base Ubuntu image and LLVM version. For example, to build Anvill linking against LLVM 16 on Ubuntu 20.04 on AMD64 do: ```shell -ARCH=amd64; UBUNTU_VERSION=20.04; LLVM=15; \ +ARCH=amd64; UBUNTU_VERSION=20.04; LLVM=16; \ docker build . \ -t anvill-llvm${LLVM}-ubuntu${UBUNTU_VERSION}-${ARCH} \ -f Dockerfile \ diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index edb8aa7a9..ef27f3a85 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -140,4 +141,5 @@ llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); bool HasMemLoc(const ValueDecl &v); bool HasRegLoc(const ValueDecl &v); + } // namespace anvill diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 03c9144e2..e204405fa 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -204,7 +204,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::DSEPass()); fpm.addPass(llvm::VerifierPass()); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::EarlyCSEPass(true)); fpm.addPass(llvm::VerifierPass()); @@ -239,7 +239,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::DCEPass()); fpm.addPass(llvm::VerifierPass()); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::VerifierPass()); // Sometimes we observe patterns where PC- and SP-related offsets are @@ -261,10 +261,10 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); //fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); fpm.addPass(llvm::VerifierPass()); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::VerifierPass()); AddCombineAdjacentShifts(fpm); @@ -279,7 +279,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(llvm::VerifierPass()); fpm.addPass(anvill::RemoveCallIntrinsics(xr, spec, lifter)); fpm.addPass(llvm::VerifierPass()); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::VerifierPass()); AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); diff --git a/lib/Passes/SliceManager.cpp b/lib/Passes/SliceManager.cpp index 46fdcc8bd..86f464c5f 100644 --- a/lib/Passes/SliceManager.cpp +++ b/lib/Passes/SliceManager.cpp @@ -6,17 +6,17 @@ * the LICENSE file found in the root directory of this source tree. */ +#include #include - #include #include #include #include #include #include + #include #include -#include namespace anvill { @@ -57,14 +57,13 @@ void SliceManager::insertClonedSliceIntoFunction( auto bb = llvm::BasicBlock::Create(this->mod.get()->getContext(), "slicebasicblock." + std::to_string(id.id), targetFunc); + llvm::IRBuilder<> builder(bb); - std::for_each(slice.begin(), slice.end(), [bb](llvm::Instruction *insn) { - bb->getInstList().push_back(insn); - }); + std::for_each(slice.begin(), slice.end(), + [&builder](llvm::Instruction *insn) { builder.Insert(insn); }); - llvm::ReturnInst::Create(this->mod.get()->getContext(), newReturn, - bb); + llvm::ReturnInst::Create(this->mod.get()->getContext(), newReturn, bb); return; } @@ -177,7 +176,6 @@ SliceManager::addSlice(llvm::ArrayRef slice, std::for_each(cloned.begin(), cloned.end(), [&mapper](llvm::Instruction *insn) { llvm::RemapInstruction(insn, mapper); - }); @@ -187,28 +185,30 @@ SliceManager::addSlice(llvm::ArrayRef slice, this->insertClonedSliceIntoFunction(id, slice_repr, new_ret, cloned); // Remove anvill pc to make interpretable - if (auto anvill_pc = this->mod.get()->getGlobalVariable(::anvill::kSymbolicPCName)) { + if (auto anvill_pc = + this->mod.get()->getGlobalVariable(::anvill::kSymbolicPCName)) { remill::ReplaceAllUsesOfConstant( - anvill_pc, llvm::Constant::getNullValue(anvill_pc->getType()), this->mod.get()); + anvill_pc, llvm::Constant::getNullValue(anvill_pc->getType()), + this->mod.get()); } if (!this->replaceAllGVConstantsWithInterpretableValue(cloned)) { slice_repr->eraseFromParent(); return std::nullopt; } - + assert(remill::VerifyModule(this->mod.get())); return {id}; } -InterpreterBuilder SliceManager::IntoInterpreterBuilder(SliceManager&& x) { +InterpreterBuilder SliceManager::IntoInterpreterBuilder(SliceManager &&x) { return InterpreterBuilder(std::move(x.mod)); } InterpreterBuilder::Slice InterpreterBuilder::getSlice(SliceID i) const { auto repr = this->mod->getFunction(SliceManager::getFunctionName(i)); - return InterpreterBuilder::Slice(repr,i); + return InterpreterBuilder::Slice(repr, i); } SliceInterpreter InterpreterBuilder::getInterp() const { diff --git a/lib/Passes/SplitStackFrameAtReturnAddress.cpp b/lib/Passes/SplitStackFrameAtReturnAddress.cpp index d9fd8cf56..72b99642a 100644 --- a/lib/Passes/SplitStackFrameAtReturnAddress.cpp +++ b/lib/Passes/SplitStackFrameAtReturnAddress.cpp @@ -562,8 +562,8 @@ static void SplitStackFrameAround(llvm::AllocaInst *frame_alloca, } if (!below.empty()) { - auto frame_size = - dl.getTypeAllocSize(frame_alloca->getAllocatedType()).getKnownMinSize(); + auto frame_size = dl.getTypeAllocSize(frame_alloca->getAllocatedType()) + .getKnownMinValue(); auto num_slots = ((frame_size - end_of_ra) + (addr_size - 1u)) / addr_size; make_subframe(std::move(below), "locals", "parameters", num_slots); } diff --git a/lib/Passes/TransformRemillJumpIntrinsics.cpp b/lib/Passes/TransformRemillJumpIntrinsics.cpp index a444b9b73..1e470fa25 100644 --- a/lib/Passes/TransformRemillJumpIntrinsics.cpp +++ b/lib/Passes/TransformRemillJumpIntrinsics.cpp @@ -6,10 +6,9 @@ * the LICENSE file found in the root directory of this source tree. */ -#include - #include #include +#include #include #include #include @@ -85,8 +84,7 @@ std::vector FindFunctionCalls(llvm::Function &func, T pred) { // Returns `true` if `val` is a possible return address -ReturnAddressResult -TransformRemillJumpIntrinsics::QueryReturnAddress( +ReturnAddressResult TransformRemillJumpIntrinsics::QueryReturnAddress( const CrossReferenceFolder &xref_folder, llvm::Module *module, llvm::Value *val) const { @@ -182,7 +180,7 @@ TransformRemillJumpIntrinsics::run(llvm::Function &func, llvm::FunctionPassManager fpm; fpm.addPass(llvm::DCEPass()); - fpm.addPass(llvm::SROAPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::SimplifyCFGPass()); fpm.addPass(llvm::InstCombinePass()); fpm.run(func, fam); diff --git a/lib/Passes/Utils.cpp b/lib/Passes/Utils.cpp index 05197a2ac..972daa38f 100644 --- a/lib/Passes/Utils.cpp +++ b/lib/Passes/Utils.cpp @@ -65,7 +65,7 @@ llvm::Value *ConvertConstantToPointer(llvm::IRBuilder<> &ir, // Cast an integer to a pointer type. } else if (auto int_ty = llvm::dyn_cast(type)) { const auto pointer_width = dl.getPointerTypeSizeInBits(dest_ptr_ty); - if (int_ty->getPrimitiveSizeInBits().getKnownMinSize() < pointer_width) { + if (int_ty->getPrimitiveSizeInBits().getKnownMinValue() < pointer_width) { int_ty = llvm::Type::getIntNTy(val_to_convert->getContext(), pointer_width); val_to_convert = llvm::ConstantExpr::getZExt(val_to_convert, int_ty); @@ -113,7 +113,7 @@ llvm::Value *ConvertValueToPointer(llvm::IRBuilder<> &ir, // Cast an integer to a pointer type. } else if (auto int_ty = llvm::dyn_cast(type)) { const auto pointer_width = dl.getPointerTypeSizeInBits(dest_ptr_ty); - if (int_ty->getPrimitiveSizeInBits().getKnownMinSize() < pointer_width) { + if (int_ty->getPrimitiveSizeInBits().getKnownMinValue() < pointer_width) { int_ty = llvm::Type::getIntNTy(val_to_convert->getContext(), pointer_width); auto dest = ir.CreateZExt(val_to_convert, int_ty); diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 9dea6c049..047712fb4 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -33,10 +33,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -992,5 +994,4 @@ bool HasRegLoc(const ValueDecl &v) { [](const LowLoc &loc) -> bool { return loc.reg; }); } - } // namespace anvill diff --git a/scripts/build.sh b/scripts/build.sh index d7c37904e..56c8862e4 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -25,8 +25,8 @@ CURR_DIR=$( pwd ) BUILD_DIR="${CURR_DIR}/anvill-build" REMILL_BUILD_DIR="${CURR_DIR}/remill-build" INSTALL_DIR=/usr/local -LLVM_VERSION=llvm-15 -CXX_COMMON_VERSION="0.2.12" +LLVM_VERSION=llvm-16 +CXX_COMMON_VERSION="0.3.2" OS_VERSION=unknown ARCH_VERSION=unknown BUILD_FLAGS= @@ -175,11 +175,14 @@ function DownloadLibraries #BUILD_FLAGS="${BUILD_FLAGS} -DCMAKE_OSX_SYSROOT=${sdk_root}" # Min version supported - OS_VERSION="macos-11" - XCODE_VERSION="13.0" - if [[ "$(sw_vers -productVersion)" == "11."* ]]; then - echo "Found MacOS Big Sur" - OS_VERSION="macos-11" + OS_VERSION="macos-12" + XCODE_VERSION="14.2" + if [[ "${SYSTEM_VERSION}" == "13.*" ]]; then + echo "Found MacOS Ventura" + OS_VERSION="macos-12" + elif [[ "${SYSTEM_VERSION}" == "12.*" ]]; then + echo "Found MacOS Monterey" + OS_VERSION="macos-12" else echo "WARNING: ****Likely unsupported MacOS Version****" echo "WARNING: ****Using ${OS_VERSION}****" @@ -338,8 +341,8 @@ function Package function GetLLVMVersion { case ${1} in - 15) - LLVM_VERSION=llvm-15 + 16) + LLVM_VERSION=llvm-16 return 0 ;; *) @@ -357,7 +360,7 @@ function Help echo "" echo "Options:" echo " --prefix Change the default (${INSTALL_DIR}) installation prefix." - echo " --llvm-version Change the default (15) LLVM version." + echo " --llvm-version Change the default (16) LLVM version." echo " --build-dir Change the default (${BUILD_DIR}) build directory." echo " --debug Build with Debug symbols." echo " --extra-cmake-args Extra CMake arguments to build with." diff --git a/scripts/run-on-anghabench.sh b/scripts/run-on-anghabench.sh index 375c10150..b2917155b 100644 --- a/scripts/run-on-anghabench.sh +++ b/scripts/run-on-anghabench.sh @@ -7,7 +7,7 @@ export BINJA_DECODE_KEY=__BINJA_DECODE_KEY__ export BINJA_CHANNEL=__BINJA_CHANNEL__ export BINJA_VERSION=__BINJA_VERSION__ -export LLVM_VERSION=15 +export LLVM_VERSION=16 export CC=clang-13 CXX=clang++-13 dpkg --add-architecture i386 From 8ebf5633db2676946314c36e904b4c32843d2b72 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 19 May 2023 22:50:24 +0200 Subject: [PATCH 137/163] Pointer lifter (#373) * Set metadata for live values * Add type decoding from metadata --- include/anvill/Type.h | 1 + lib/Lifters/BasicBlockLifter.cpp | 7 ++-- lib/Type.cpp | 72 ++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/include/anvill/Type.h b/include/anvill/Type.h index 4a239a19f..f0ac573c8 100644 --- a/include/anvill/Type.h +++ b/include/anvill/Type.h @@ -275,6 +275,7 @@ class TypeTranslator { EncodingFormat alphanum = EncodingFormat::kDefault) const; llvm::MDNode *EncodeToMetadata(TypeSpec spec) const; + TypeSpec DecodeFromMetadata(llvm::MDNode *md) const; Result DecodeFromSpec(TypeSpec spec) const; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 508f01837..d690a175a 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -661,12 +661,11 @@ void BasicBlockLifter::UnpackLiveValues( // is this how we want to do this.... now the value really doesnt live in memory anywhere but the frame. if (!HasMemLoc(decl.param)) { auto ptr = returned_value(decl.param); - if (auto insn = llvm::dyn_cast(ptr)) { - insn->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( - decl.param.spec_type)); - } auto loaded_var_val = bldr.CreateLoad(decl.param.type, ptr, decl.param.name); + loaded_var_val->setMetadata( + "anvill.type", + this->type_specifier.EncodeToMetadata(decl.param.spec_type)); auto mem_ptr = remill::LoadMemoryPointer(bldr, this->intrinsics); auto new_mem_ptr = StoreNativeValue( diff --git a/lib/Type.cpp b/lib/Type.cpp index a6722dcf4..e197d59f8 100644 --- a/lib/Type.cpp +++ b/lib/Type.cpp @@ -64,6 +64,7 @@ class TypeSpecifierImpl { std::unordered_map type_to_id; std::vector id_to_type; std::unordered_map type_to_md; + std::unordered_map md_to_type; inline TypeSpecifierImpl(const TypeDictionary &type_dict_, const llvm::DataLayout &dl_) @@ -611,6 +612,77 @@ llvm::MDNode *TypeTranslator::EncodeToMetadata(TypeSpec spec) const { return std::visit([this](auto &&t) { return impl->TypeToMetadata(t); }, spec); } +TypeSpec TypeTranslator::DecodeFromMetadata(llvm::MDNode *md) const { + if (impl->md_to_type.count(md)) { + return impl->md_to_type[md]; + } + + auto &res = impl->md_to_type[md]; + auto kind = llvm::cast(md->getOperand(0).get()); + if (kind->getString().equals("BaseType")) { + auto const_value = + llvm::cast(md->getOperand(1).get()) + ->getValue(); + auto const_int = llvm::cast(const_value); + res = static_cast(const_int->getZExtValue()); + } else if (kind->getString().equals("PointerType")) { + auto ptrtype = std::make_shared(UnknownType{}, false); + res = ptrtype; + auto pointee = llvm::cast(md->getOperand(1).get()); + ptrtype->pointee = DecodeFromMetadata(pointee); + } else if (kind->getString().equals("VectorType")) { + auto vectype = std::make_shared(UnknownType{}, 0); + res = vectype; + auto base = llvm::cast(md->getOperand(1).get()); + auto const_value = + llvm::cast(md->getOperand(2).get()) + ->getValue(); + auto const_int = llvm::cast(const_value); + vectype->base = DecodeFromMetadata(base); + vectype->size = const_int->getZExtValue(); + } else if (kind->getString().equals("ArrayType")) { + auto arrtype = std::make_shared(UnknownType{}, 0); + res = arrtype; + auto base = llvm::cast(md->getOperand(1).get()); + auto const_value = + llvm::cast(md->getOperand(2).get()) + ->getValue(); + auto const_int = llvm::cast(const_value); + arrtype->base = DecodeFromMetadata(base); + arrtype->size = const_int->getZExtValue(); + } else if (kind->getString().equals("StructType")) { + auto strcttype = std::make_shared(); + res = strcttype; + for (size_t i = 1; i < md->getNumOperands(); ++i) { + strcttype->members.push_back(DecodeFromMetadata( + llvm::cast(md->getOperand(i).get()))); + } + } else if (kind->getString().equals("FunctionType")) { + auto functype = std::make_shared( + UnknownType{}, std::vector{}, false); + res = functype; + auto const_value = + llvm::cast(md->getOperand(1).get()) + ->getValue(); + auto const_int = llvm::cast(const_value); + functype->is_variadic = const_int->getZExtValue(); + functype->return_type = + DecodeFromMetadata(llvm::cast(md->getOperand(2).get())); + for (size_t i = 3; i < md->getNumOperands(); ++i) { + functype->arguments.push_back(DecodeFromMetadata( + llvm::cast(md->getOperand(i).get()))); + } + } else if (kind->getString().equals("UnknownType")) { + auto const_value = + llvm::cast(md->getOperand(1).get()) + ->getValue(); + auto const_int = llvm::cast(const_value); + res = UnknownType{static_cast(const_int->getZExtValue())}; + } + + return res; +} + // Parse an encoded type string into its represented type. Result TypeTranslator::DecodeFromSpec(TypeSpec spec) const { From 09cef2b388a7b0c500fb9d52d7d63c8d6a55cf49 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:08:49 -0700 Subject: [PATCH 138/163] Add image base spec (#379) * add image base to spec * add image name to spec * add image base/name to anvill spec * use std::uint64_t * Use const ref where appropriate * fix dcheck * maintain backwards compat --------- Co-authored-by: Alex Cameron Co-authored-by: 2over12 --- data_specifications/specification.proto | 2 ++ include/anvill/Specification.h | 6 ++++++ lib/Lifters/CodeLifter.cpp | 2 +- lib/Specification.cpp | 27 ++++++++++++++++++++++--- lib/Specification.h | 6 +++++- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 150558603..195a872cd 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -385,4 +385,6 @@ message Specification { repeated MemoryRange memory_ranges = 7; ControlFlowOverrides overrides = 8; map type_aliases = 9; + string image_name = 10; + uint64 image_base = 11; } diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index 562c6d61a..f92160ebd 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -124,6 +124,12 @@ class Specification { // Return the architecture used by this specification. std::shared_ptr Arch(void) const; + // Return the image name used by this specification. + const std::string &ImageName(void) const; + + // Return the image base address used by this specification. + std::uint64_t ImageBase(void) const; + // Return the type dictionary used by this specification. const ::anvill::TypeDictionary &TypeDictionary(void) const; diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index d91b873f1..bc1005158 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -277,7 +277,7 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { // Initialize cleanup optimizations - DCHECK(llvm::verifyFunction(*inf, &llvm::errs())); + DCHECK(!llvm::verifyFunction(*inf, &llvm::errs())); llvm::legacy::FunctionPassManager fpm(inf->getParent()); fpm.add(llvm::createCFGSimplificationPass()); diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 084b14bb0..7af758027 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -32,8 +32,12 @@ namespace anvill { SpecificationImpl::~SpecificationImpl(void) {} -SpecificationImpl::SpecificationImpl(std::unique_ptr arch_) +SpecificationImpl::SpecificationImpl(std::unique_ptr arch_, + const std::string &image_name_, + std::uint64_t image_base_) : arch(std::move(arch_)), + image_name(image_name_), + image_base(image_base_), type_dictionary(*(arch->context)), type_translator(type_dictionary, arch.get()) {} @@ -249,6 +253,16 @@ std::shared_ptr Specification::Arch(void) const { return std::shared_ptr(impl, impl->arch.get()); } +// Return the architecture used by this specification. +const std::string &Specification::ImageName(void) const { + return impl->image_name; +} + +// Return the architecture used by this specification. +std::uint64_t Specification::ImageBase(void) const { + return impl->image_base; +} + // Return the type dictionary used by this specification. const ::anvill::TypeDictionary &Specification::TypeDictionary(void) const { return impl->type_dictionary; @@ -326,8 +340,11 @@ Specification::DecodeFromPB(llvm::LLVMContext &context, const std::string &pb) { return arch.Error(); } + const auto &image_name = spec.image_name(); + auto image_base = spec.image_base(); + std::shared_ptr pimpl( - new SpecificationImpl(arch.TakeValue())); + new SpecificationImpl(arch.TakeValue(), image_name, image_base)); auto maybe_warnings = pimpl->ParseSpecification(spec); @@ -355,8 +372,12 @@ Specification::DecodeFromPB(llvm::LLVMContext &context, std::istream &pb) { return arch.Error(); } + const auto &image_name = spec.image_name(); + auto image_base = spec.image_base(); + + std::shared_ptr pimpl( - new SpecificationImpl(arch.TakeValue())); + new SpecificationImpl(arch.TakeValue(), image_name, image_base)); auto maybe_warnings = pimpl->ParseSpecification(spec); diff --git a/lib/Specification.h b/lib/Specification.h index 17db6ac26..327967308 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -34,7 +34,8 @@ class SpecificationImpl friend class Specification; SpecificationImpl(void) = delete; - SpecificationImpl(std::unique_ptr arch_); + SpecificationImpl(std::unique_ptr arch_, + const std::string &image_name_, std::uint64_t image_base_); Result, std::string> ParseSpecification(const ::specification::Specification &obj); @@ -45,6 +46,9 @@ class SpecificationImpl // Architecture used by all of the function and global variable declarations. const std::unique_ptr arch; + std::string image_name; + std::uint64_t image_base; + const TypeDictionary type_dictionary; const TypeTranslator type_translator; From ba87273ff5b6513bc77926f8e052a4b1e7c2904e Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 5 Jul 2023 21:14:21 +1000 Subject: [PATCH 139/163] Add missing `` header for `std::exchange` (#384) --- include/anvill/Result.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/anvill/Result.h b/include/anvill/Result.h index ea7e97dc6..ba6c6eec1 100644 --- a/include/anvill/Result.h +++ b/include/anvill/Result.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include namespace anvill { @@ -44,10 +45,10 @@ class Result final { const ValueType *operator->(void) const; Result(const ValueType &value); - Result(ValueType &&value): destroyed(false), data(std::move(value)) {} + Result(ValueType &&value) : destroyed(false), data(std::move(value)) {} Result(const ErrorType &error); - Result(ErrorType &&error): destroyed(false), data(std::move(error)) {} + Result(ErrorType &&error) : destroyed(false), data(std::move(error)) {} Result(Result &&other) noexcept; Result &operator=(Result &&other) noexcept; From 2772efc3402e48ec0245001d1c0fcc58ef13d974 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 7 Jul 2023 07:06:33 -0400 Subject: [PATCH 140/163] reconstruct floats during a load (#385) --- lib/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 047712fb4..5f33dced1 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -546,7 +546,7 @@ GetSubcomponentType(const LowLoc &loc, uint64_t offset, llvm::Type *target_type, llvm::DataLayout &data) { // there's two situations here, either we have a primitive target type in which case the loc must // indicate the size for each component, otherwise we decompose the target - if (auto itype = llvm::isa_and_nonnull(target_type)) { + if (target_type->isIntegerTy() || target_type->isFloatingPointTy()) { return llvm::IntegerType::get(target_type->getContext(), loc.Size() * 8); } else { llvm::Type *ty = target_type; From 0a9dd3fe7b2c8216f3e291b6f082f44299be4279 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 10 Jul 2023 17:34:23 +0200 Subject: [PATCH 141/163] Add `required_globals` to spec (#386) * Add `required_globals` to spec * Make `required_globals` a set --- data_specifications/specification.proto | 1 + include/anvill/Specification.h | 2 ++ lib/Specification.cpp | 8 +++++++- lib/Specification.h | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 195a872cd..9f7a6d963 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -387,4 +387,5 @@ message Specification { map type_aliases = 9; string image_name = 10; uint64 image_base = 11; + repeated string required_globals = 12; } diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index f92160ebd..18d7fed65 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -199,6 +199,8 @@ class Specification { SpecBlockContexts GetBlockContexts() const { return SpecBlockContexts(*this); } + + const std::unordered_set &GetRequiredGlobals() const; }; } // namespace anvill diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 7af758027..dbf8fc5c1 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -238,7 +238,8 @@ SpecificationImpl::ParseSpecification( std::sort(misc_overrides.begin(), misc_overrides.end(), [](const auto &a, const auto &b) { return a.address < b.address; }); - // TODO(frabert): Parse everything else + required_globals = {spec.required_globals().begin(), + spec.required_globals().end()}; return dec_err; } @@ -539,4 +540,9 @@ void Specification::ForEachMiscOverride( } } +const std::unordered_set & +Specification::GetRequiredGlobals() const { + return impl->required_globals; +} + } // namespace anvill diff --git a/lib/Specification.h b/lib/Specification.h index 327967308..2be2f1ab2 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "anvill/Passes/BasicBlockPass.h" @@ -92,6 +93,8 @@ class SpecificationImpl std::vector misc_overrides; std::unordered_map control_flow_overrides; + + std::unordered_set required_globals; }; } // namespace anvill From 9f3e12353d4dc7e3e07e74a67882736f404396f9 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Tue, 11 Jul 2023 14:02:27 +1000 Subject: [PATCH 142/163] Add `-j` flag to `test-amp-challenge-bins.sh` and update ignore list (#380) --- .github/workflows/build.yml | 3 +- ci/challenge_bins_test_settings.json | 85 +++++++++++++++++++++++++--- libraries/lifting-tools-ci | 2 +- scripts/test-amp-challenge-bins.sh | 30 +++++++--- 4 files changed, 100 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 040caa05c..3c2f2ba2c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -366,7 +366,8 @@ jobs: python3 -m pip install -r libraries/lifting-tools-ci/requirements.txt scripts/test-amp-challenge-bins.sh \ --ghidra-install-dir $GHIDRA_INSTALL_DIR \ - --decompile-cmd "anvill-decompile-spec" + --decompile-cmd "anvill-decompile-spec" \ + --jobs 8 env: TOB_AMP_PASSPHRASE: ${{secrets.TOB_AMP_PASSPHRASE}} - name: Tar and Compress logs diff --git a/ci/challenge_bins_test_settings.json b/ci/challenge_bins_test_settings.json index be82bc69b..0eedb5720 100644 --- a/ci/challenge_bins_test_settings.json +++ b/ci/challenge_bins_test_settings.json @@ -1,16 +1,83 @@ { "timeout.seconds": "800", "tests.ignore": [ - "challenge-3_arm64_program_go_patched", - "challenge-3_arm64_program_go", - "challenge-3_armv7_program_go_patched", - "challenge-3_armv7_program_go", - "challenge-3_amd64_program_go_patched", - "challenge-3_amd64_program_go", - "challenge-3_x86_program_go_patched", - "challenge-3_x86_program_go", + "challenge-6-armv7-program_c.clang-10.patched-nodebug", + "challenge-7-armv7-program_c.clang-10.vuln-nodebug", + "challenge-6-amd64-program_c.clang-10.vuln-nodebug", + "challenge-6-x86-program_c.clang-10.patched-nodebug", + "challenge-7-amd64-program_c.clang-10.vuln", + "challenge-5-armv7-program_c.clang.patched", + "challenge-7-armv7-program_c.clang-10.vuln", + "challenge-8-x86-program_c-nodebug", + "ppc-linflexd_uart_mpc5744p.elf", + "challenge-7-amd64-program_c.clang-10.vuln-nodebug", + "challenge-6-x86-program_c.clang-10.vuln-nodebug", + "challenge-3-arm64-program_go_patched", + "challenge-8-amd64-program_c", + "challenge-9-amd64-program_c.clang-10.vuln", + "challenge-8-armv7-program_c-nodebug", + "challenge-9-armv7-program_c.clang-10.vuln-nodebug", + "challenge-8-x86-program_c", + "challenge-6-armv7-program_c.clang-10.vuln", + "challenge-9-amd64-program_c.clang-10.vuln-nodebug", + "challenge-5-amd64-program_c.clang.vuln", + "challenge-6-amd64-program_c.clang-10.patched", + "challenge-6-x86-program_c.clang-10.patched", + "challenge-6-amd64-program_c.clang-10.patched-nodebug", + "challenge-5-amd64-program_c.clang.vuln-nodebug", + "ppc-tsens_mpc5744p.elf", + "challenge-3-arm64-program_go", + "challenge-5-amd64-program_c.clang.patched-nodebug", + "challenge-9-armv7-program_c.clang-10.vuln", + "challenge-7-x86-program_c.clang-10.vuln-nodebug", + "challenge-5-armv7-program_c.clang.vuln", + "challenge-6-amd64-program_c.clang-10.vuln", + "challenge-5-x86-program_c.clang.vuln", + "challenge-5-amd64-program_c.clang.patched", + "ppc_vle_booke_example.elf", + "challenge-5-x86-program_c.clang.vuln-nodebug", + "challenge-7-x86-program_c.clang-10.vuln", + "challenge-6-armv7-program_c.clang-10.vuln-nodebug", + "challenge-8-amd64-program_c-nodebug", + "challenge-6-x86-program_c.clang-10.vuln", + "challenge-9-x86-program_c.clang-10.vuln-nodebug", + "challenge-5-x86-program_c.clang.patched", + "challenge-5-x86-program_c.clang.patched-nodebug", + "challenge-5-armv7-program_c.clang.patched-nodebug", + "challenge-5-armv7-program_c.clang.vuln-nodebug", + "challenge-6-armv7-program_c.clang-10.patched", + "challenge-8-armv7-program_c", + "challenge-9-x86-program_c.clang-10.vuln", + "challenge-9-arm64-program_c.clang-10.vuln-nodebug", + "challenge-9-arm64-program_c.clang-10.vuln", + "challenge-6-arm64-program_c.clang-10.patched", + "challenge-3-amd64-program_go_patched-nodebug", + "challenge-3-amd64-program_go_patched", + "challenge-3-x86-program_go", + "challenge-5-arm64-program_c.clang.vuln", + "challenge-8-arm64-program_c-nodebug", + "challenge-6-arm64-program_c.clang-10.patched-nodebug", + "challenge-7-arm64-program_c.clang-10.vuln", + "challenge-3-x86-program_go-nodebug", + "challenge-5-arm64-program_c.clang.vuln-nodebug", + "challenge-3-arm64-program_go_patched-nodebug", + "challenge-6-arm64-program_c.clang-10.vuln", + "challenge-3-amd64-program_go", + "challenge-3-amd64-program_go-nodebug", + "challenge-3-x86-program_go_patched", + "challenge-8-arm64-program_c", + "challenge-5-arm64-program_c.clang.patched", + "challenge-7-arm64-program_c.clang-10.vuln-nodebug", + "challenge-6-arm64-program_c.clang-10.vuln-nodebug", + "challenge-5-arm64-program_c.clang.patched-nodebug", + "challenge-3-x86-program_go_patched-nodebug", + "challenge-3-arm64-program_go-nodebug", "challenge-10-ppc-program_c.elf", - "challenge-10-ppc-vle-program_c.elf" + "challenge-10-ppc-vle-program_c.elf", + "challenge-3-armv7-program_go", + "challenge-3-armv7-program_go-nodebug", + "challenge-3-armv7-program_go_patched", + "challenge-3-armv7-program_go_patched-nodebug" ], "language_id_overrides": { "ppc-adc_mpc5744p.elf": "PowerPC:BE:64:VLE-32addr", diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 2f5ae380e..f2e75cd43 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 2f5ae380e78f2288ead5f4c1c6aef30c68c9d721 +Subproject commit f2e75cd4346ccb55c5aafaa59814d6ff8c82efa7 diff --git a/scripts/test-amp-challenge-bins.sh b/scripts/test-amp-challenge-bins.sh index 5611ec921..29339c55f 100755 --- a/scripts/test-amp-challenge-bins.sh +++ b/scripts/test-amp-challenge-bins.sh @@ -11,6 +11,7 @@ function Help echo "Options:" echo " --ghidra-install-dir The ghidra install dir. Default ${GHIDRA_INSTALL_DIR}" echo " --decompile-cmd The anvill decompile command to invoke. Default ${ANVILL_DECOMPILE}" + echo " --jobs The number of jobs that can run concurrently. Defaults to system's CPU count" echo " -h --help Print help." } @@ -72,6 +73,12 @@ while [[ $# -gt 0 ]] ; do shift # past argument ;; + # How many concurrent jobs + --jobs) + NUM_JOBS=${2} + shift # past argument + ;; + *) # unknown option echo "[x] Unknown option: ${key}" @@ -90,7 +97,7 @@ then fi if ! ${ANVILL_DECOMPILE} --version &>/dev/null; -then +then echo "[!] Could not execute anvill decompile cmd: ${ANVILL_DECOMPILE}" exit 1 fi @@ -112,15 +119,20 @@ FAILED="no" for dir in binaries do echo "[+] Testing ${dir}" - ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py \ - --ghidra-install-dir "${GHIDRA_INSTALL_DIR}" \ - --anvill-decompile "${ANVILL_DECOMPILE}" \ - --input-dir "$(pwd)/${dir}" \ - --output-dir "$(pwd)/results/${dir}" \ - --run-name "anvill-live-ci-amp-bins" \ - --test-options "${SRC_DIR}/ci/challenge_bins_test_settings.json" \ - --dump-stats \ + args=( + --ghidra-install-dir "${GHIDRA_INSTALL_DIR}" + --anvill-decompile "${ANVILL_DECOMPILE}" + --input-dir "$(pwd)/${dir}" + --output-dir "$(pwd)/results/${dir}" + --run-name "anvill-live-ci-amp-bins" + --test-options "${SRC_DIR}/ci/challenge_bins_test_settings.json" + --dump-stats --dump-benchmark + ) + if [[ -v NUM_JOBS ]]; then + args+=(--jobs "${NUM_JOBS}") + fi + ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py "${args[@]}" if ! check_test "$(pwd)/results/${dir}/python/stats.json" From 465ecdc4e3b0c28149b90a1efeb3d38857f8bcd5 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 2 Aug 2023 09:28:55 -0400 Subject: [PATCH 143/163] Debug utils (#369) * add flags to print llvm ir before/after each llvm pass * set llvm debug flag earlier * python script to print diff between llvm opts * add note on usage * add comment * use new pass manager, otherwise debug diff won't work * don't enable debug logging unless a debug flag is specified * clang format --- bin/Decompile/Main.cpp | 23 ++++++++++++--- include/anvill/Lifters.h | 7 ++++- lib/Lifters/CodeLifter.cpp | 59 +++++++++++++++++++++++++++++--------- lib/Lifters/CodeLifter.h | 3 +- scripts/build.sh | 2 +- 5 files changed, 73 insertions(+), 21 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index b74245233..e79ceba68 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -45,6 +47,10 @@ DEFINE_bool(add_breakpoints, false, DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); +DEFINE_bool(llvm_print_changed_diff, false, + "Print IR diff. NOTE: LLVM must be compiled as Debug"); +DEFINE_bool(llvm_print_changed_color_diff, false, + "Print IR colored diff. NOTE: LLVM must be compiled as Debug"); DEFINE_bool(inline_basic_blocks, false, "Enables inlining of basic blocks for high level output"); @@ -89,6 +95,16 @@ int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); + if (FLAGS_llvm_debug) { + llvm::DebugFlag = true; + } + + if (FLAGS_llvm_print_changed_diff) { + llvm::PrintChanged = llvm::ChangePrinter::DiffVerbose; + } else if (FLAGS_llvm_print_changed_color_diff) { + llvm::PrintChanged = llvm::ChangePrinter::ColourDiffVerbose; + } + if (FLAGS_spec.empty()) { std::cerr << "Please specify a path to a Protobuf specification file in '--spec'" @@ -176,6 +192,9 @@ int main(int argc, char *argv[]) { options.stack_frame_recovery_options.stack_offset_metadata_name = "stack_offset"; + options.debug_pm = FLAGS_llvm_debug || FLAGS_llvm_print_changed_diff || + FLAGS_llvm_print_changed_color_diff; + anvill::EntityLifter lifter(options); std::unordered_map names; @@ -245,10 +264,6 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } - if (FLAGS_llvm_debug) { - llvm::DebugFlag = true; - } - if (!FLAGS_disable_opt) { anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); } diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 6107def57..67b2408fe 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -219,7 +219,8 @@ class LifterOptions { //TODO(ian): This should be initialized by an OS + arch pair stack_pointer_is_signed(false), should_remove_anvill_pc(true), - should_inline_basic_blocks(false) { + should_inline_basic_blocks(false), + debug_pm(false) { CheckModuleContextMatchesArch(); } @@ -293,6 +294,10 @@ class LifterOptions { bool should_inline_basic_blocks : 1; + // enable pass manager debug printout + bool debug_pm; + + private: LifterOptions(void) = delete; diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index bc1005158..372913cfe 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -3,13 +3,20 @@ #include #include #include -#include #include #include +#include +#include #include #include +#include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -279,18 +286,44 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { DCHECK(!llvm::verifyFunction(*inf, &llvm::errs())); - llvm::legacy::FunctionPassManager fpm(inf->getParent()); - fpm.add(llvm::createCFGSimplificationPass()); - fpm.add(llvm::createPromoteMemoryToRegisterPass()); - fpm.add(llvm::createReassociatePass()); - fpm.add(llvm::createDeadStoreEliminationPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createSROAPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createInstructionCombiningPass()); - fpm.doInitialization(); - fpm.run(*inf); - fpm.doFinalization(); + llvm::ModuleAnalysisManager mam; + llvm::FunctionAnalysisManager fam; + llvm::LoopAnalysisManager lam; + llvm::CGSCCAnalysisManager cam; + + llvm::ModulePassManager mpm; + llvm::FunctionPassManager fpm; + + llvm::PassInstrumentationCallbacks pic; + llvm::StandardInstrumentations si(inf->getContext(), + /*DebugLogging=*/options.debug_pm, + /*VerifyEach=*/options.debug_pm); + si.registerCallbacks(pic, &fam); + + llvm::PassBuilder pb(nullptr, llvm::PipelineTuningOptions(), std::nullopt, + &pic); + pb.registerModuleAnalyses(mam); + pb.registerFunctionAnalyses(fam); + pb.registerLoopAnalyses(lam); + pb.registerCGSCCAnalyses(cam); + pb.crossRegisterProxies(lam, fam, cam, mam); + + fpm.addPass(llvm::SimplifyCFGPass()); + fpm.addPass(llvm::PromotePass()); + fpm.addPass(llvm::ReassociatePass()); + fpm.addPass(llvm::DSEPass()); + fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); + fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::InstCombinePass()); + + mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); + mpm.run(*inf->getParent(), mam); + + mam.clear(); + fam.clear(); + lam.clear(); + cam.clear(); ClearVariableNames(inf); } diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 3bf1479a2..9f28d1276 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -29,7 +29,6 @@ class CodeLifter { remill::OperandLifter::OpLifterPtr op_lifter; - // Are we lifting SPARC code? This affects whether or not we need to do // double checking on function return addresses; const bool is_sparc; @@ -84,4 +83,4 @@ class CodeLifter { CodeLifter(CodeLifter &&) = default; }; -} // namespace anvill \ No newline at end of file +} // namespace anvill diff --git a/scripts/build.sh b/scripts/build.sh index 56c8862e4..03b44a2bd 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -364,7 +364,7 @@ function Help echo " --build-dir Change the default (${BUILD_DIR}) build directory." echo " --debug Build with Debug symbols." echo " --extra-cmake-args Extra CMake arguments to build with." - echo " --install Just install Rellic, do not package it." + echo " --install Just install Anvill, do not package it." echo " -h --help Print help." } From f7f498dd76d6f443c47a8409cd74c0ef367762da Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 9 Aug 2023 03:12:50 +1000 Subject: [PATCH 144/163] Remove challenge testing (#389) --- .github/workflows/build.yml | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3c2f2ba2c..7c02b969a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -359,39 +359,6 @@ jobs: run: | dpkg -i ${{ steps.package_names.outputs.DEB_PACKAGE_PATH }} - - name: Run Integration Tests (AMP Challenge Binaries) - shell: bash - working-directory: ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill - run: | - python3 -m pip install -r libraries/lifting-tools-ci/requirements.txt - scripts/test-amp-challenge-bins.sh \ - --ghidra-install-dir $GHIDRA_INSTALL_DIR \ - --decompile-cmd "anvill-decompile-spec" \ - --jobs 8 - env: - TOB_AMP_PASSPHRASE: ${{secrets.TOB_AMP_PASSPHRASE}} - - name: Tar and Compress logs - if: failure() - run: | - shopt -s globstar - tar -cf test-errs.tar.xz ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill/amp-challenge-bins/**/std* - shell: bash - - name: Upload stderr/stdout logs on error - if: failure() - uses: actions/upload-artifact@v3 - with: - name: AMP Challenge Binaries logs - path: test-errs.tar.xz - - # - name: Run Integration Tests (AnghaBench 1K) - # shell: bash - # working-directory: ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill - # run: | - # python3 -m pip install -r libraries/lifting-tools-ci/requirements.txt - # scripts/test-angha-1k.sh \ - # --python-cmd "python3 -m anvill" \ - # --decompile-cmd "anvill-decompile-json" - - name: Store the DEB package uses: actions/upload-artifact@v3 with: From 66bb39d02851ab45696c140b86d745b4f4754528 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 16 Aug 2023 14:02:43 +1000 Subject: [PATCH 145/163] Use encoded type for constant value mappings (#388) --- lib/Protobuf.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 5684a52b0..2d41d1d32 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -638,11 +638,16 @@ void ProtobufTranslator::ParseCFGIntoFunction( LOG(FATAL) << "No stack ptr"; } - auto stackptr_type_spec = SizeToType(stackptr->size * 8); + auto target_type_spec = DecodeType(symval.target_value().type()); + if (!target_type_spec.Succeeded()) { + LOG(ERROR) << "Failed to lift target type " + << target_type_spec.TakeError(); + return; + } - auto target_vdecl = - DecodeValueDecl(symval.target_value().values(), stackptr_type_spec, - "Unable to get value decl for stack offset relation"); + auto target_vdecl = DecodeValueDecl( + symval.target_value().values(), target_type_spec.TakeValue(), + "Unable to get value decl for target"); if (!target_vdecl.Succeeded()) { LOG(ERROR) << "Failed to lift value " << target_vdecl.TakeError(); From e04ff9d688f9eaa6f137bcfe1dea736acbf1c98e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Mon, 18 Sep 2023 16:04:47 -0400 Subject: [PATCH 146/163] Ian/allow for typehints in spec (#390) * add support for type hints after instructions * fix annotations to use intrinsics * remove spurious header * remove unrequired import * remove unused import --- data_specifications/specification.proto | 10 +++++ include/anvill/Declarations.h | 8 ++++ lib/Lifters/BasicBlockLifter.cpp | 43 ++++++++++++++++++++ lib/Lifters/BasicBlockLifter.h | 3 ++ lib/Lifters/CodeLifter.cpp | 24 ++++++++++- lib/Lifters/CodeLifter.h | 3 ++ lib/Optimize.cpp | 1 - lib/Passes/ConvertPointerArithmeticToGEP.cpp | 25 ++++++++++++ lib/Protobuf.cpp | 21 ++++++++++ 9 files changed, 135 insertions(+), 3 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 9f7a6d963..34692f52d 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -305,6 +305,11 @@ message StackFrame { int64 parameter_offset = 5; } +message TypeHint { + uint64 target_addr = 1; + Variable target_var = 2; +} + message Function { uint64 entry_address = 1; FunctionLinkage func_linkage = 3; @@ -319,6 +324,11 @@ message Function { StackFrame frame = 9; repeated Parameter in_scope_vars = 10; + + // an instruction can have a set of typehints that says this loc is known + // to have this type after this instruction, these will be translated into + // a low lifting of that location with spec type metadata + repeated TypeHint type_hints = 11; } message GlobalVariable { diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index a6bdfe713..7977fff7a 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -365,6 +365,11 @@ class SpecBlockContext : public BasicBlockContext { virtual const std::vector &LiveParamsAtExit() const override; }; + +struct TypeHint { + uint64_t target_addr; + ValueDecl hint; +}; // A function decl, as represented at a "near ABI" level. To be specific, // not all C, and most C++ decls, as written would be directly translatable // to this. This ought nearly represent how LLVM represents a C/C++ function @@ -415,6 +420,9 @@ struct FunctionDecl : public CallableDecl { std::unordered_map> constant_values_at_exit; + // sorted vector of hints + std::vector type_hints; + std::uint64_t stack_depth; std::uint64_t maximum_depth; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index d690a175a..e55748679 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -298,6 +298,33 @@ bool BasicBlockLifter::DecodeInstructionInto(const uint64_t addr, } +void BasicBlockLifter::ApplyTypeHint(llvm::IRBuilder<> &bldr, + const ValueDecl &type_hint) { + + auto ty_hint = this->GetTypeHintFunction(); + auto state_ptr_internal = + this->lifted_func->getArg(remill::kStatePointerArgNum); + auto mem_ptr = + remill::LoadMemoryPointer(bldr.GetInsertBlock(), this->intrinsics); + auto curr_value = + anvill::LoadLiftedValue(type_hint, options.TypeDictionary(), intrinsics, + options.arch, bldr, state_ptr_internal, mem_ptr); + + if (curr_value->getType()->isPointerTy()) { + auto call = bldr.CreateCall(ty_hint, {curr_value}); + call->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata( + type_hint.spec_type)); + curr_value = call; + } + + auto new_mem_ptr = + StoreNativeValue(curr_value, type_hint, options.TypeDictionary(), + intrinsics, bldr, state_ptr_internal, mem_ptr); + bldr.CreateStore(new_mem_ptr, + remill::LoadMemoryPointerRef(bldr.GetInsertBlock())); +} + + void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { auto entry_block = &this->lifted_func->getEntryBlock(); @@ -340,6 +367,22 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { inst, bb, this->lifted_func->getArg(remill::kStatePointerArgNum), false /* is_delayed */); + llvm::IRBuilder<> builder(bb); + + auto start = + std::lower_bound(decl.type_hints.begin(), decl.type_hints.end(), + inst.pc, [](const TypeHint &hint_rhs, uint64_t addr) { + return hint_rhs.target_addr < addr; + }); + auto end = + std::upper_bound(decl.type_hints.begin(), decl.type_hints.end(), + inst.pc, [](uint64_t addr, const TypeHint &hint_rhs) { + return addr < hint_rhs.target_addr; + }); + for (; start != end; start++) { + this->ApplyTypeHint(builder, start->hint); + } + ended_on_terminal = !this->ApplyInterProceduralControlFlowOverride(inst, bb); DLOG_IF(INFO, ended_on_terminal) diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index 565b365b5..e6c065a97 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -73,6 +73,9 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext CreateDecodingContext(const CodeBlock &blk); + + void ApplyTypeHint(llvm::IRBuilder<> &bldr, const ValueDecl &type_hint); + void LiftInstructionsIntoLiftedFunction(); BasicBlockFunction CreateBasicBlockFunction(); diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 372913cfe..6454142bb 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -1,6 +1,9 @@ #include "CodeLifter.h" +#include +#include #include +#include #include #include #include @@ -24,8 +27,6 @@ #include -#include "anvill/Type.h" - namespace anvill { namespace { // Clear out LLVM variable names. They're usually not helpful. @@ -170,6 +171,25 @@ void CodeLifter::InitializeStateStructureFromGlobalRegisterVariables( }); } +llvm::Function *CodeLifter::GetTypeHintFunction() { + const auto &func_name = kTypeHintFunctionPrefix; + + auto func = semantics_module->getFunction(func_name); + if (func != nullptr) { + return func; + } + + auto ptr = llvm::PointerType::get(this->semantics_module->getContext(), 0); + llvm::Type *func_parameters[] = {ptr}; + + auto func_type = llvm::FunctionType::get(ptr, func_parameters, false); + + func = llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage, + func_name, this->semantics_module); + + return func; +} + llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, llvm::LLVMContext &context) const { auto pc_val = llvm::ConstantInt::get( diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 9f28d1276..cc7f10438 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -72,6 +72,9 @@ class CodeLifter { unsigned pc_annotation_id; + + llvm::Function *GetTypeHintFunction(); + llvm::MDNode *GetAddrAnnotation(uint64_t addr, llvm::LLVMContext &context) const; diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index e204405fa..e3b1db9ca 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -262,7 +262,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, //AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options); //AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options); fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); - //fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); fpm.addPass(llvm::VerifierPass()); diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 19bec6289..8a0f949fb 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -6,6 +6,7 @@ * the LICENSE file found in the root directory of this source tree. */ +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +73,8 @@ struct ConvertPointerArithmeticToGEP::Impl { llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, UnknownType t); llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, TypeSpec type); + + bool ConvertTypeHints(llvm::Function &f); bool ConvertLoadInt(llvm::Function &f); bool FoldPtrAdd(llvm::Function &f); bool FoldScaledIndex(llvm::Function &f); @@ -330,6 +334,26 @@ llvm::StringRef ConvertPointerArithmeticToGEP::name() { return "ConvertPointerArithmeticToGEP"; } +bool ConvertPointerArithmeticToGEP::Impl::ConvertTypeHints(llvm::Function &f) { + std::vector calls; + for (auto &insn : llvm::instructions(f)) { + if (auto *call = llvm::dyn_cast(&insn)) { + if (call->getCalledFunction() && + call->getCalledFunction()->getName() == kTypeHintFunctionPrefix) { + calls.push_back(call); + } + } + } + + for (auto call : calls) { + auto arg = call->getArgOperand(0); + call->replaceAllUsesWith(arg); + call->eraseFromParent(); + } + + return !calls.empty(); +} + // Finds `(load i64, P)` and converts it to `(ptrtoint (load ptr, P))` bool ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { using namespace llvm::PatternMatch; @@ -573,6 +597,7 @@ llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::runOnBasicBlockFunction( bool changed = impl->ConvertLoadInt(function); changed |= impl->FoldPtrAdd(function); changed |= impl->FoldScaledIndex(function); + changed |= impl->ConvertTypeHints(function); return changed ? llvm::PreservedAnalyses::none() : llvm::PreservedAnalyses::all(); } diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 2d41d1d32..6c72c0444 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -558,6 +558,27 @@ Result ProtobufTranslator::DecodeFunction( this->ParseCFGIntoFunction(function, decl); + + for (auto &ty_hint : function.type_hints()) { + auto maybe_type = DecodeType(ty_hint.target_var().type()); + if (maybe_type.Succeeded()) { + auto maybe_var = + DecodeValueDecl(ty_hint.target_var().values(), maybe_type.TakeValue(), + "attempting to decode type hint value"); + if (maybe_var.Succeeded()) { + decl.type_hints.push_back( + {ty_hint.target_addr(), maybe_var.TakeValue()}); + } + } else { + LOG(ERROR) << "Failed to decode type for type hint"; + } + } + + std::sort(decl.type_hints.begin(), decl.type_hints.end(), + [](const TypeHint &hint_lhs, const TypeHint &hint_rhs) { + return hint_lhs.target_addr < hint_rhs.target_addr; + }); + auto link = function.func_linkage(); if (link == specification::FUNCTION_LINKAGE_DECL) { From 4413bfb13ac2d44853abdce53b592c99e004617c Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 21 Sep 2023 21:19:54 +1000 Subject: [PATCH 147/163] Revert "Debug utils (#369)" (#391) This reverts commit 465ecdc4e3b0c28149b90a1efeb3d38857f8bcd5. --- bin/Decompile/Main.cpp | 23 +++------------ include/anvill/Lifters.h | 7 +---- lib/Lifters/CodeLifter.cpp | 59 +++++++++----------------------------- lib/Lifters/CodeLifter.h | 3 +- scripts/build.sh | 2 +- 5 files changed, 21 insertions(+), 73 deletions(-) diff --git a/bin/Decompile/Main.cpp b/bin/Decompile/Main.cpp index e79ceba68..b74245233 100644 --- a/bin/Decompile/Main.cpp +++ b/bin/Decompile/Main.cpp @@ -17,8 +17,6 @@ #include #include #include -#include -#include #include #include #include @@ -47,10 +45,6 @@ DEFINE_bool(add_breakpoints, false, DEFINE_bool(add_names, false, "Try to apply symbol names to lifted entities."); DEFINE_bool(disable_opt, false, "Dont apply optimization passes"); DEFINE_bool(llvm_debug, false, "Enable LLVM debug flag"); -DEFINE_bool(llvm_print_changed_diff, false, - "Print IR diff. NOTE: LLVM must be compiled as Debug"); -DEFINE_bool(llvm_print_changed_color_diff, false, - "Print IR colored diff. NOTE: LLVM must be compiled as Debug"); DEFINE_bool(inline_basic_blocks, false, "Enables inlining of basic blocks for high level output"); @@ -95,16 +89,6 @@ int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); - if (FLAGS_llvm_debug) { - llvm::DebugFlag = true; - } - - if (FLAGS_llvm_print_changed_diff) { - llvm::PrintChanged = llvm::ChangePrinter::DiffVerbose; - } else if (FLAGS_llvm_print_changed_color_diff) { - llvm::PrintChanged = llvm::ChangePrinter::ColourDiffVerbose; - } - if (FLAGS_spec.empty()) { std::cerr << "Please specify a path to a Protobuf specification file in '--spec'" @@ -192,9 +176,6 @@ int main(int argc, char *argv[]) { options.stack_frame_recovery_options.stack_offset_metadata_name = "stack_offset"; - options.debug_pm = FLAGS_llvm_debug || FLAGS_llvm_print_changed_diff || - FLAGS_llvm_print_changed_color_diff; - anvill::EntityLifter lifter(options); std::unordered_map names; @@ -264,6 +245,10 @@ int main(int argc, char *argv[]) { llvm::EnableStatistics(); } + if (FLAGS_llvm_debug) { + llvm::DebugFlag = true; + } + if (!FLAGS_disable_opt) { anvill::OptimizeModule(lifter, module, spec.GetBlockContexts(), spec); } diff --git a/include/anvill/Lifters.h b/include/anvill/Lifters.h index 67b2408fe..6107def57 100644 --- a/include/anvill/Lifters.h +++ b/include/anvill/Lifters.h @@ -219,8 +219,7 @@ class LifterOptions { //TODO(ian): This should be initialized by an OS + arch pair stack_pointer_is_signed(false), should_remove_anvill_pc(true), - should_inline_basic_blocks(false), - debug_pm(false) { + should_inline_basic_blocks(false) { CheckModuleContextMatchesArch(); } @@ -294,10 +293,6 @@ class LifterOptions { bool should_inline_basic_blocks : 1; - // enable pass manager debug printout - bool debug_pm; - - private: LifterOptions(void) = delete; diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 6454142bb..2b8e921d8 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -6,20 +6,13 @@ #include #include #include +#include #include #include -#include -#include #include #include -#include -#include -#include -#include -#include #include #include -#include #include #include #include @@ -306,44 +299,18 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { DCHECK(!llvm::verifyFunction(*inf, &llvm::errs())); - llvm::ModuleAnalysisManager mam; - llvm::FunctionAnalysisManager fam; - llvm::LoopAnalysisManager lam; - llvm::CGSCCAnalysisManager cam; - - llvm::ModulePassManager mpm; - llvm::FunctionPassManager fpm; - - llvm::PassInstrumentationCallbacks pic; - llvm::StandardInstrumentations si(inf->getContext(), - /*DebugLogging=*/options.debug_pm, - /*VerifyEach=*/options.debug_pm); - si.registerCallbacks(pic, &fam); - - llvm::PassBuilder pb(nullptr, llvm::PipelineTuningOptions(), std::nullopt, - &pic); - pb.registerModuleAnalyses(mam); - pb.registerFunctionAnalyses(fam); - pb.registerLoopAnalyses(lam); - pb.registerCGSCCAnalyses(cam); - pb.crossRegisterProxies(lam, fam, cam, mam); - - fpm.addPass(llvm::SimplifyCFGPass()); - fpm.addPass(llvm::PromotePass()); - fpm.addPass(llvm::ReassociatePass()); - fpm.addPass(llvm::DSEPass()); - fpm.addPass(llvm::DCEPass()); - fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); - fpm.addPass(llvm::DCEPass()); - fpm.addPass(llvm::InstCombinePass()); - - mpm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm))); - mpm.run(*inf->getParent(), mam); - - mam.clear(); - fam.clear(); - lam.clear(); - cam.clear(); + llvm::legacy::FunctionPassManager fpm(inf->getParent()); + fpm.add(llvm::createCFGSimplificationPass()); + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + fpm.add(llvm::createReassociatePass()); + fpm.add(llvm::createDeadStoreEliminationPass()); + fpm.add(llvm::createDeadCodeEliminationPass()); + fpm.add(llvm::createSROAPass()); + fpm.add(llvm::createDeadCodeEliminationPass()); + fpm.add(llvm::createInstructionCombiningPass()); + fpm.doInitialization(); + fpm.run(*inf); + fpm.doFinalization(); ClearVariableNames(inf); } diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index cc7f10438..195815eeb 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -29,6 +29,7 @@ class CodeLifter { remill::OperandLifter::OpLifterPtr op_lifter; + // Are we lifting SPARC code? This affects whether or not we need to do // double checking on function return addresses; const bool is_sparc; @@ -86,4 +87,4 @@ class CodeLifter { CodeLifter(CodeLifter &&) = default; }; -} // namespace anvill +} // namespace anvill \ No newline at end of file diff --git a/scripts/build.sh b/scripts/build.sh index 03b44a2bd..56c8862e4 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -364,7 +364,7 @@ function Help echo " --build-dir Change the default (${BUILD_DIR}) build directory." echo " --debug Build with Debug symbols." echo " --extra-cmake-args Extra CMake arguments to build with." - echo " --install Just install Anvill, do not package it." + echo " --install Just install Rellic, do not package it." echo " -h --help Print help." } From 875fcd55078dc3261301ab4fb74618f37024d1ac Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Tue, 26 Sep 2023 00:46:00 +1000 Subject: [PATCH 148/163] Bump `lifting-tools-ci` (#394) --- libraries/lifting-tools-ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index f2e75cd43..95b0aa262 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit f2e75cd4346ccb55c5aafaa59814d6ff8c82efa7 +Subproject commit 95b0aa2621908df4982a18b02ef50fcb94d1044c From c3f8be49fe5b7e64b1e47a27627b2d65ccfff2ad Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 27 Sep 2023 01:12:36 +1000 Subject: [PATCH 149/163] Support lifting x87 80-bit float constants (#393) --- lib/Lifters/ValueLifter.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/Lifters/ValueLifter.cpp b/lib/Lifters/ValueLifter.cpp index abb550223..f68d4c0bb 100644 --- a/lib/Lifters/ValueLifter.cpp +++ b/lib/Lifters/ValueLifter.cpp @@ -352,6 +352,13 @@ ValueLifterImpl::Lift(llvm::ArrayRef data, llvm::Type *type, return llvm::ConstantFP::get(type, val.bitsToDouble()); } + case llvm::Type::X86_FP80TyID: { + const auto size = static_cast(dl.getTypeStoreSize(type)); + auto val = ConsumeBytesAsInt(data, size); + const llvm::APFloat float_val(llvm::APFloat::x87DoubleExtended(), val); + return llvm::ConstantFP::get(type, float_val); + } + default: LOG(FATAL) << "Cannot initialize constant of unhandled LLVM type " << remill::LLVMThingToString(type) << " at " << std::hex From 9722e02b1d9dd6c88218bea6158c23e0fe39ce9b Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 4 Oct 2023 19:59:26 -0400 Subject: [PATCH 150/163] Callsite override (#398) * implement call site override * simplify override logic * address c++ nits * add missing const * a bb context should always exist given a valid bb address --- include/anvill/Providers.h | 41 ------------- include/anvill/Specification.h | 4 ++ include/anvill/Utils.h | 8 +++ lib/Lifters/BasicBlockLifter.cpp | 2 + lib/Lifters/FunctionLifter.cpp | 12 ---- lib/Lifters/FunctionLifter.h | 7 --- lib/Passes/RemoveCallIntrinsics.cpp | 38 +++++++----- lib/Providers/TypeProvider.cpp | 95 ----------------------------- lib/Specification.cpp | 13 +++- lib/Utils.cpp | 28 +++++++++ 10 files changed, 78 insertions(+), 170 deletions(-) diff --git a/include/anvill/Providers.h b/include/anvill/Providers.h index 58edb809a..b8251f021 100644 --- a/include/anvill/Providers.h +++ b/include/anvill/Providers.h @@ -45,11 +45,6 @@ class TypeProvider { std::optional TryGetFunctionTypeOrDefault(uint64_t address) const; - std::optional - TryGetCalledFunctionTypeOrDefault(uint64_t function_address, - const remill::Instruction &from_inst, - uint64_t to_address) const; - std::optional TryGetVariableTypeOrDefault(uint64_t address, llvm::Type *hinted_value_type = nullptr) const; @@ -60,19 +55,6 @@ class TypeProvider { virtual std::optional TryGetFunctionType(uint64_t address) const = 0; - // Try to return the type of a function that has been called from `from_isnt`. - virtual std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst) const; - - // Try to return the type of a function starting at address `to_address`. This - // type is the prototype of the function. The type can be call site specific, - // where the call site is `from_inst`. - virtual std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst, - uint64_t to_address) const; - // Try to return the variable at given address or containing the address virtual std::optional TryGetVariableType(uint64_t address, @@ -153,19 +135,6 @@ class ProxyTypeProvider : public TypeProvider { std::optional TryGetFunctionType(uint64_t address) const override; - // Try to return the type of a function that has been called from `from_isnt`. - std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst) const override; - - // Try to return the type of a function starting at address `to_address`. This - // type is the prototype of the function. The type can be call site specific, - // where the call site is `from_inst`. - std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst, - uint64_t to_address) const override; - // Try to return the variable at given address or containing the address std::optional TryGetVariableType(uint64_t address, @@ -206,11 +175,6 @@ class DefaultCallableTypeProvider : public ProxyTypeProvider { // Set `decl` to the default callable type for `arch`. void SetDefault(remill::ArchName arch, CallableDecl decl); - // Try to return the type of a function that has been called from `from_isnt`. - std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst) const override; - std::optional TryGetFunctionType(uint64_t address) const override; }; @@ -225,11 +189,6 @@ class SpecificationTypeProvider : public BaseTypeProvider { explicit SpecificationTypeProvider(const Specification &spec); - // Try to return the type of a function that has been called from `from_isnt`. - std::optional - TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst) const override; - // Try to return the type of a function starting at address `address`. This // type is the prototype of the function. std::optional diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index 18d7fed65..393ac196b 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -146,6 +146,10 @@ class Specification { static anvill::Result DecodeFromPB(llvm::LLVMContext &context, std::istream &pb); + // Return the call site at a given function address, instruction address pair, or an empty `shared_ptr`. + std::shared_ptr + CallSiteAt(const std::pair &loc) const; + // Return the function beginning at `address`, or an empty `shared_ptr`. std::shared_ptr FunctionAt(std::uint64_t address) const; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index ef27f3a85..05b54ad0e 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -49,6 +49,14 @@ std::string CreateFunctionName(std::uint64_t addr); // Creates a `data_
` name from an address std::string CreateVariableName(std::uint64_t addr); +// Get metadata for an instruction +std::optional GetMetadata(llvm::StringRef tag, + const llvm::Instruction &instr); + +// Set metadata for an instruction +void SetMetadata(llvm::StringRef tag, llvm::Instruction &insn, + std::uint64_t pc_val); + // Looks for any constant expressions in the operands of `inst` and unfolds // them into other instructions in the same block. void UnfoldConstantExpressions(llvm::Instruction *inst); diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e55748679..e3b433898 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -28,6 +28,7 @@ #include "Lifters/FunctionLifter.h" #include "anvill/Declarations.h" #include "anvill/Optimize.h" +#include "anvill/Utils.h" namespace anvill { @@ -183,6 +184,7 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( call = this->AddCallFromBasicBlockFunctionToLifted( block, this->intrinsics.function_call, this->intrinsics); } + SetMetadata(options.pc_metadata_name, *call, insn.pc); if (!cc.stop) { auto [_, raddr] = this->LoadFunctionReturnAddress(insn, block); auto npc = remill::LoadNextProgramCounterRef(block); diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 6d74702ee..673149617 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -143,18 +143,6 @@ void FunctionLifter::InsertError(llvm::BasicBlock *block) { AnnotateInstruction(tail, pc_annotation_id, pc_annotation); } - -std::optional -FunctionLifter::TryGetTargetFunctionType(const remill::Instruction &from_inst, - std::uint64_t address) { - std::optional opt_callable_decl = - type_provider.TryGetCalledFunctionTypeOrDefault(func_address, from_inst, - address); - - return opt_callable_decl; -} - - // Get the annotation for the program counter `pc`, or `nullptr` if we're // not doing annotations. llvm::MDNode *FunctionLifter::GetPCAnnotation(uint64_t pc) const { diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 2fe5ebc5e..9a2670f0e 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -222,13 +222,6 @@ class FunctionLifter : public CodeLifter { bool CallFunction(const remill::Instruction &inst, llvm::BasicBlock *block, std::optional target_pc); - // A wrapper around the type provider's TryGetFunctionType that makes use - // of the control flow provider to handle control flow redirections for - // thunks - std::optional - TryGetTargetFunctionType(const remill::Instruction &inst, - std::uint64_t address); - // Visit a direct function call control-flow instruction. The target is known // at decode time, and its realized address is stored in // `inst.branch_taken_pc`. In practice, what we do in this situation is try diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index 8a58943a1..ee73227dc 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -22,11 +22,6 @@ llvm::PreservedAnalyses RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, llvm::FunctionAnalysisManager &am, llvm::PreservedAnalyses prev) { - // remillFunctionCall->getFunction()->dump(); - // if (remillFunctionCall->getFunction()->getName().endswith( - // "basic_block_func4201200")) { - // LOG(FATAL) << "done"; - // } CHECK(remillFunctionCall->getNumOperands() == 4); auto target_func = remillFunctionCall->getArgOperand(1); auto state_ptr = remillFunctionCall->getArgOperand(0); @@ -43,23 +38,38 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, ra.references_global_value || // Related to a global var/func. ra.references_program_counter) { // Related to `__anvill_pc`. - // TODO(Ian): ignoring callsite decls for now - auto fdecl = spec.FunctionAt(ra.u.address); - auto entity = this->xref_resolver.EntityAtAddress(ra.u.address); - if (fdecl && entity) { + std::shared_ptr callable_decl = + spec.FunctionAt(ra.u.address); + + if (auto pc_val = + GetMetadata(lifter.Options().pc_metadata_name, *remillFunctionCall); + pc_val.has_value()) { + if (auto bb_addr = GetBasicBlockAddr(f); bb_addr.has_value()) { + auto block_contexts = spec.GetBlockContexts(); + const auto &bb_ctx = block_contexts.GetBasicBlockContextForAddr(*bb_addr)->get(); + auto func = bb_ctx.GetParentFunctionAddress(); + if (auto override_decl = spec.CallSiteAt({func, *pc_val})) { + DLOG(INFO) << "Overriding call site at " << std::hex << *pc_val + << " in " << std::hex << func; + callable_decl = std::move(override_decl); + } + } + } + + auto *entity = this->xref_resolver.EntityAtAddress(ra.u.address); + if (callable_decl && entity) { llvm::IRBuilder<> ir(remillFunctionCall->getParent()); ir.SetInsertPoint(remillFunctionCall); - const remill::IntrinsicTable table( - remillFunctionCall->getFunction()->getParent()); + const remill::IntrinsicTable table(f->getParent()); DLOG(INFO) << "Replacing call from: " << remill::LLVMThingToString(remillFunctionCall) << " with call to " << std::hex << ra.u.address << " d has: " << std::string(entity->getName()); - auto new_mem = - fdecl->CallFromLiftedBlock(entity, lifter.Options().TypeDictionary(), - table, ir, state_ptr, mem_ptr); + auto *new_mem = callable_decl->CallFromLiftedBlock( + entity, lifter.Options().TypeDictionary(), table, ir, state_ptr, + mem_ptr); remillFunctionCall->replaceAllUsesWith(new_mem); remillFunctionCall->eraseFromParent(); diff --git a/lib/Providers/TypeProvider.cpp b/lib/Providers/TypeProvider.cpp index 739958be6..ebe51be6f 100644 --- a/lib/Providers/TypeProvider.cpp +++ b/lib/Providers/TypeProvider.cpp @@ -56,29 +56,6 @@ NullTypeProvider::TryGetVariableType(uint64_t, llvm::Type *) const { return std::nullopt; } -// Try to return the type of a function starting at address `to_address`. This -// type is the prototype of the function. The type can be call site specific, -// where the call site is `from_inst`. -std::optional -TypeProvider::TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &from_inst, - uint64_t to_address) const { - if (auto decl = TryGetCalledFunctionType(function_address, from_inst)) { - return decl; - } else if (auto func_decl = TryGetFunctionType(to_address)) { - return static_cast(func_decl.value()); - } else { - return std::nullopt; - } -} - -// Try to return the type of a function that has been called from `from_isnt`. -std::optional -TypeProvider::TryGetCalledFunctionType(uint64_t function_address, - const remill::Instruction &) const { - return std::nullopt; -} - BaseTypeProvider::~BaseTypeProvider() {} const ::anvill::TypeDictionary &BaseTypeProvider::Dictionary(void) const { @@ -104,19 +81,6 @@ SpecificationTypeProvider::SpecificationTypeProvider(const Specification &spec) : BaseTypeProvider(spec.impl->type_translator), impl(spec.impl) {} -// Try to return the type of a function that has been called from `from_isnt`. -std::optional SpecificationTypeProvider::TryGetCalledFunctionType( - uint64_t function_address, const remill::Instruction &from_inst) const { - std::pair loc{function_address, from_inst.pc}; - - auto cs_it = impl->loc_to_call_site.find(loc); - if (cs_it == impl->loc_to_call_site.end()) { - return std::nullopt; - } else { - return *(cs_it->second); - } -} - // Try to return the type of a function starting at address `address`. This // type is the prototype of the function. std::optional @@ -140,36 +104,6 @@ SpecificationTypeProvider::TryGetVariableType(uint64_t address, } } -// Try to return the type of a function that has been called from `from_isnt`. -std::optional -DefaultCallableTypeProvider::TryGetCalledFunctionType( - uint64_t function_address, const remill::Instruction &from_inst) const { - auto maybe_res = - ProxyTypeProvider::TryGetCalledFunctionType(function_address, from_inst); - if (maybe_res.has_value()) { - return maybe_res; - } - - - auto maybe_func_type = - ProxyTypeProvider::TryGetFunctionType(function_address); - if (maybe_func_type.has_value()) { - return maybe_func_type; - } - - if (auto arch_decl = impl->TryGetDeclForArch(from_inst.arch_name)) { - return *arch_decl; - } - - if (from_inst.arch_name != from_inst.sub_arch_name) { - if (auto sub_arch_decl = impl->TryGetDeclForArch(from_inst.sub_arch_name)) { - return *sub_arch_decl; - } - } - - return std::nullopt; -} - std::optional DefaultCallableTypeProvider::TryGetFunctionType(uint64_t address) const { auto maybe_res = ProxyTypeProvider::TryGetFunctionType(address); @@ -211,22 +145,6 @@ ProxyTypeProvider::TryGetFunctionType(uint64_t address) const { return this->deleg.TryGetFunctionType(address); } -// Try to return the type of a function that has been called from `from_isnt`. -std::optional ProxyTypeProvider::TryGetCalledFunctionType( - uint64_t function_address, const remill::Instruction &from_inst) const { - return this->deleg.TryGetCalledFunctionType(function_address, from_inst); -} - -// Try to return the type of a function starting at address `to_address`. This -// type is the prototype of the function. The type can be call site specific, -// where the call site is `from_inst`. -std::optional ProxyTypeProvider::TryGetCalledFunctionType( - uint64_t function_address, const remill::Instruction &from_inst, - uint64_t to_address) const { - return this->deleg.TryGetCalledFunctionType(function_address, from_inst, - to_address); -} - // Try to return the variable at given address or containing the address std::optional ProxyTypeProvider::TryGetVariableType(uint64_t address, @@ -274,19 +192,6 @@ TypeProvider::TryGetFunctionTypeOrDefault(uint64_t address) const { return this->GetDefaultFunctionType(address); } - -std::optional TypeProvider::TryGetCalledFunctionTypeOrDefault( - uint64_t function_address, const remill::Instruction &from_inst, - uint64_t to_address) const { - auto res = - this->TryGetCalledFunctionType(function_address, from_inst, to_address); - if (res.has_value()) { - return res; - } - - return this->GetDefaultFunctionType(to_address); -} - std::optional TypeProvider::TryGetVariableTypeOrDefault(uint64_t address, llvm::Type *hinted_value_type) const { diff --git a/lib/Specification.cpp b/lib/Specification.cpp index dbf8fc5c1..b4d20fe15 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -84,7 +84,8 @@ SpecificationImpl::ParseSpecification( continue; } auto cs_obj = maybe_cs.Value(); - std::pair loc{cs_obj.function_address, cs_obj.address}; + std::pair loc{cs_obj.function_address, + cs_obj.address}; if (loc_to_call_site.count(loc)) { std::stringstream ss; @@ -394,6 +395,16 @@ Specification::DecodeFromPB(llvm::LLVMContext &context, std::istream &pb) { return Specification(std::move(pimpl)); } +// Return the call site at a given function address, instruction address pair, or an empty `shared_ptr`. +std::shared_ptr Specification::CallSiteAt( + const std::pair &loc) const { + auto it = impl->loc_to_call_site.find(loc); + if (it != impl->loc_to_call_site.end()) { + return {impl, it->second}; + } + return {}; +} + // Return the function beginning at `address`, or an empty `shared_ptr`. std::shared_ptr Specification::FunctionAt(std::uint64_t address) const { diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 5f33dced1..b36993afd 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -244,6 +244,34 @@ std::string CreateVariableName(std::uint64_t addr) { return ss.str(); } +std::optional GetMetadata(llvm::StringRef tag, + const llvm::Instruction &instr) { + if (auto *metadata = instr.getMetadata(tag)) { + for (const auto &op : metadata->operands()) { + if (auto *md = dyn_cast(op.get())) { + if (auto c = dyn_cast(md->getValue())) { + auto pc_val = c->getValue().getZExtValue(); + return pc_val; + } + } + } + } + + return {}; +} + +void SetMetadata(llvm::StringRef tag, llvm::Instruction &insn, + std::uint64_t pc_val) { + auto &context = insn.getContext(); + auto &dl = insn.getModule()->getDataLayout(); + auto *address_type = + llvm::Type::getIntNTy(context, dl.getPointerSizeInBits(0)); + auto *cam = llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(address_type, pc_val)); + auto *node = llvm::MDNode::get(insn.getContext(), cam); + insn.setMetadata(tag, node); +} + void CopyMetadataTo(llvm::Value *src, llvm::Value *dst) { if (src == dst) { return; From 3a506a5fd5e67311dc82ed3c1ce6e28aec422dd9 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 11 Oct 2023 13:17:01 -0400 Subject: [PATCH 151/163] check if index is in range (#397) --- lib/Passes/ConvertPointerArithmeticToGEP.cpp | 98 ++++++++++++-------- 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/lib/Passes/ConvertPointerArithmeticToGEP.cpp b/lib/Passes/ConvertPointerArithmeticToGEP.cpp index 8a0f949fb..961173f69 100644 --- a/lib/Passes/ConvertPointerArithmeticToGEP.cpp +++ b/lib/Passes/ConvertPointerArithmeticToGEP.cpp @@ -418,6 +418,63 @@ bool ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) { return false; } +namespace { +void BuildIndices(uint64_t &offset, TypeSpec &cur_spec, llvm::Type *&cur_type, + std::vector &indices, const llvm::DataLayout &dl) { + while (offset != 0) { + if (std::holds_alternative>(cur_spec)) { + auto struct_spec = std::get>(cur_spec); + llvm::StructType *struct_type = llvm::cast(cur_type); + + auto layout = dl.getStructLayout(struct_type); + if (offset >= layout->getSizeInBytes()) { + return; + } + + auto index = layout->getElementContainingOffset(offset); + indices.push_back(index); + + cur_spec = struct_spec->members[index]; + cur_type = struct_type->getElementType(index); + offset -= layout->getElementOffset(index); + } else if (std::holds_alternative>(cur_spec)) { + auto arr_spec = std::get>(cur_spec); + auto arr_type = llvm::cast(cur_type); + + auto elem_size = + dl.getTypeSizeInBits(arr_type->getArrayElementType()) / 8; + auto index = offset / elem_size; + + if (index >= arr_type->getNumElements()) { + return; + } + + indices.push_back(index); + + cur_spec = arr_spec->base; + cur_type = arr_type->getArrayElementType(); + offset -= index * elem_size; + } else if (std::holds_alternative>(cur_spec)) { + auto vec_spec = std::get>(cur_spec); + auto vec_type = llvm::cast(cur_type); + + auto elem_size = dl.getTypeSizeInBits(vec_type->getElementType()) / 8; + auto index = offset / elem_size; + if (index >= vec_type->getElementCount().getKnownMinValue()) { + return; + } + indices.push_back(index); + + cur_spec = vec_spec->base; + cur_type = vec_type->getElementType(); + offset -= index * elem_size; + } else { + return; + } + } +} +} // namespace + // Finds `(add (ptrtoint P), A)` and tries to convert to `(ptrtoint (gep ...))` bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { using namespace llvm::PatternMatch; @@ -460,47 +517,8 @@ bool ConvertPointerArithmeticToGEP::Impl::FoldPtrAdd(llvm::Function &f) { indices.push_back(index); offset = offset % cur_size; } - while (offset != 0) { - if (std::holds_alternative>(cur_spec)) { - auto struct_spec = std::get>(cur_spec); - auto struct_type = llvm::cast(cur_type); - - auto layout = dl.getStructLayout(struct_type); - auto index = layout->getElementContainingOffset(offset); - indices.push_back(index); - - cur_spec = struct_spec->members[index]; - cur_type = struct_type->getElementType(index); - offset -= layout->getElementOffset(index); - } else if (std::holds_alternative>(cur_spec)) { - auto arr_spec = std::get>(cur_spec); - auto arr_type = llvm::cast(cur_type); - - auto elem_size = - dl.getTypeSizeInBits(arr_type->getArrayElementType()) / 8; - auto index = offset / elem_size; - indices.push_back(index); - - cur_spec = arr_spec->base; - cur_type = arr_type->getArrayElementType(); - offset -= index * elem_size; - } else if (std::holds_alternative>( - cur_spec)) { - auto vec_spec = std::get>(cur_spec); - auto vec_type = llvm::cast(cur_type); - - auto elem_size = dl.getTypeSizeInBits(vec_type->getElementType()) / 8; - auto index = offset / elem_size; - indices.push_back(index); - - cur_spec = vec_spec->base; - cur_type = vec_type->getElementType(); - offset -= index * elem_size; - } else { - break; - } - } + BuildIndices(offset, cur_spec, cur_type, indices, dl); if (offset != 0) { continue; } From 91c71622d5df1838d0beb633f3f763b454c35696 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 12 Oct 2023 14:12:05 +1100 Subject: [PATCH 152/163] Add Angha50 test to CI (#392) --- .github/workflows/build.yml | 24 +++++++++++++++ ...tings.json => angha_50_test_settings.json} | 0 libraries/lifting-tools-ci | 2 +- .../{test-angha-1k.sh => test-angha-50.sh} | 29 ++++++++++++++----- 4 files changed, 46 insertions(+), 9 deletions(-) rename ci/{angha_1k_test_settings.json => angha_50_test_settings.json} (100%) rename scripts/{test-angha-1k.sh => test-angha-50.sh} (85%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7c02b969a..002fdbe8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -359,6 +359,30 @@ jobs: run: | dpkg -i ${{ steps.package_names.outputs.DEB_PACKAGE_PATH }} + - name: Run Integration Tests (AnghaBench 50) + shell: bash + working-directory: ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill + run: | + python3 -m pip install -r libraries/lifting-tools-ci/requirements.txt + scripts/test-angha-50.sh \ + --ghidra-install-dir $GHIDRA_INSTALL_DIR \ + --decompile-cmd "anvill-decompile-spec" \ + --jobs 8 + + - name: Tar and Compress logs + if: failure() + run: | + shopt -s globstar + tar -cf test-errs.tar.xz ${{ steps.build_paths.outputs.REL_SOURCE }}/anvill/angha-test-50/**/std* + shell: bash + + - name: Upload stderr/stdout logs on error + if: failure() + uses: actions/upload-artifact@v3 + with: + name: AnghaBench 50 logs + path: test-errs.tar.xz + - name: Store the DEB package uses: actions/upload-artifact@v3 with: diff --git a/ci/angha_1k_test_settings.json b/ci/angha_50_test_settings.json similarity index 100% rename from ci/angha_1k_test_settings.json rename to ci/angha_50_test_settings.json diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 95b0aa262..7f02cf509 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 95b0aa2621908df4982a18b02ef50fcb94d1044c +Subproject commit 7f02cf50924fe99751ead9952885ce4231e93cdc diff --git a/scripts/test-angha-1k.sh b/scripts/test-angha-50.sh similarity index 85% rename from scripts/test-angha-1k.sh rename to scripts/test-angha-50.sh index 2b7a9b54f..c88a81a0c 100755 --- a/scripts/test-angha-1k.sh +++ b/scripts/test-angha-50.sh @@ -5,11 +5,12 @@ GHIDRA_INSTALL_DIR="~/ghidra_10.1.5_PUBLIC/" ANVILL_DECOMPILE="anvill-decompile-spec" function Help { - echo "Run Anvill on AnghaBech-1K" + echo "Run Anvill on AnghaBench-50" echo "" echo "Options:" echo " --ghidra-install-dir The ghidra install dir. Default ${GHIDRA_INSTALL_DIR}" echo " --decompile-cmd The anvill decompile command to invoke. Default ${ANVILL_DECOMPILE}" + echo " --jobs The number of jobs that can run concurrently. Defaults to system's CPU count" echo " -h --help Print help." } @@ -69,6 +70,12 @@ while [[ $# -gt 0 ]] ; do --decompile-cmd) ANVILL_DECOMPILE=${2} shift # past argument + ;; + + # How many concurrent jobs + --jobs) + NUM_JOBS=${2} + shift # past argument ;; *) @@ -90,17 +97,17 @@ fi if ! ${ANVILL_DECOMPILE} --version &>/dev/null; -then +then echo "[!] Could not execute anvill decompile cmd: ${ANVILL_DECOMPILE}" exit 1 fi # create a working directory -mkdir -p angha-test-1k -pushd angha-test-1k +mkdir -p angha-test-50 +pushd angha-test-50 -# fetch the test set: 1K binaries (per arch) -${SRC_DIR}/libraries/lifting-tools-ci/datasets/fetch_anghabench.sh --run-size 1k --binaries +# fetch the test set: 50 binaries (per arch) +${SRC_DIR}/libraries/lifting-tools-ci/datasets/fetch_anghabench.sh --run-size 50 --binaries # extract it for tarfile in *.tar.xz do @@ -111,14 +118,20 @@ FAILED="no" for arch in $(ls -1 binaries/) do echo "[+] Testing architecture ${arch}" - ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py \ + args=( --ghidra-install-dir "${GHIDRA_INSTALL_DIR}" \ --anvill-decompile "${ANVILL_DECOMPILE}" \ --input-dir "$(pwd)/binaries/${arch}" \ --output-dir "$(pwd)/results/${arch}" \ --run-name "anvill-live-ci-${arch}" \ - --test-options "${SRC_DIR}/ci/angha_1k_test_settings.json" \ + --test-options "${SRC_DIR}/ci/angha_50_test_settings.json" \ --dump-stats + ) + if [[ -v NUM_JOBS ]]; then + args+=(--jobs "${NUM_JOBS}") + fi + ${SRC_DIR}/libraries/lifting-tools-ci/tool_run_scripts/anvill.py "${args[@]}" + if ! check_test "$(pwd)/results/${arch}/python/stats.json" From 610205ab331291c252d537d535f45a9f7b970afa Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Thu, 12 Oct 2023 15:43:52 -0400 Subject: [PATCH 153/163] UID codeblocks refactor (#396) * Use UID for codeblocks * Fix BasicBlockContext lookups Fixes the missing literal_struct_2 issue * Test CI * Strong Uid typing * Cleanup and support parsing Json spec * Revert "Test CI" This reverts commit 4cfe2d955b1b70f2e3d0fd984cfd1e43b23a19a3. * Remove basic block address metadata Look up address using UID * Fix review comments and add entry_uid to function spec * Keep track of UID to CodeBlock mapping Useful for getting CodeBlock function addresses without a tracked function * point to compatible irene --------- Co-authored-by: 2over12 --- .github/workflows/build.yml | 1 + data_specifications/specification.proto | 9 ++++-- include/anvill/ABI.h | 2 +- include/anvill/Declarations.h | 39 ++++++++++++++++++------- include/anvill/Passes/BasicBlockPass.h | 8 ++--- include/anvill/Specification.h | 7 +++-- include/anvill/Utils.h | 2 +- lib/ABI.cpp | 2 +- lib/Declarations.cpp | 26 ++++++++--------- lib/Lifters/BasicBlockLifter.cpp | 23 ++++++++------- lib/Lifters/BasicBlockLifter.h | 2 +- lib/Lifters/CodeLifter.cpp | 11 +++++++ lib/Lifters/CodeLifter.h | 4 +++ lib/Lifters/FunctionLifter.cpp | 35 +++++++++++----------- lib/Lifters/FunctionLifter.h | 6 ++-- lib/Optimize.cpp | 2 +- lib/Passes/RemoveCallIntrinsics.cpp | 4 +-- lib/Passes/ReplaceStackReferences.cpp | 3 +- lib/Protobuf.cpp | 38 ++++++++++++++---------- lib/Protobuf.h | 4 +-- lib/Specification.cpp | 31 ++++++++++++++++++-- lib/Specification.h | 3 ++ lib/Utils.cpp | 6 ++-- tests/anvill_passes/CMakeLists.txt | 2 +- 24 files changed, 175 insertions(+), 95 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 002fdbe8f..a1852e53c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -154,6 +154,7 @@ jobs: - name: Clone Ghidra Spec Generation uses: actions/checkout@v3 with: + ref: ekilmer/uid-codeblocks path: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 repository: "trailofbits/irene3" fetch-depth: 0 diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 34692f52d..dd95ba170 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -271,10 +271,13 @@ message BlockContext { message CodeBlock { uint64 address = 1; string name = 2; + // Incoming block(s) by uid repeated uint64 incoming_blocks = 3; + // Outgoing block(s) by uid repeated uint64 outgoing_blocks = 4; uint32 size = 5; map context_assignments = 6; + uint64 uid = 7; } message Variables { @@ -312,12 +315,14 @@ message TypeHint { message Function { uint64 entry_address = 1; - FunctionLinkage func_linkage = 3; + uint64 entry_uid = 12; + FunctionLinkage func_linkage = 3; Callable callable = 4; + // Mapping of unique ID to codeblock map blocks = 5; map local_variables = 6; - // Keys are addresses of code blocks, each block + // Keys are unique IDs of code blocks, each block // may have a corresponding context map block_context = 7; StackEffects stack_effects = 8; diff --git a/include/anvill/ABI.h b/include/anvill/ABI.h index 02cf9e302..ef836ff60 100644 --- a/include/anvill/ABI.h +++ b/include/anvill/ABI.h @@ -86,7 +86,7 @@ extern const std::string kAnvillStackZero; // use this to queue off of then just move it after the split extern const std::string kStackMetadata; -extern const std::string kBasicBlockMetadata; +extern const std::string kBasicBlockUidMetadata; /// Intrinsic that acts like a return instruction but leaves both the basic block and the parent function. diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index 7977fff7a..f2de10367 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -45,15 +46,31 @@ struct Register; } // namespace remill namespace anvill { +struct Uid { + std::uint64_t value; + bool operator==(const Uid &) const = default; +}; + +} + +template <> +struct std::hash { + size_t operator()(const anvill::Uid &uid) const noexcept { + return std::hash()(uid.value); + } +}; + +namespace anvill { struct CodeBlock { uint64_t addr; uint32_t size; - std::unordered_set outgoing_edges; + std::unordered_set outgoing_edges; // The set of context assignments that occur at the entry point to this block. // A block may have specific decoding context properties such as "TM=1" (the thumb bit is set) // So we declare the context assignments that occur at the entry point to a block. std::unordered_map context_assignments; + Uid uid; }; @@ -390,6 +407,8 @@ struct FunctionDecl : public CallableDecl { public: // Address of this function in memory. std::uint64_t address{0}; + // Entry block UID + Uid entry_uid{0}; // The maximum number of bytes of redzone afforded to this function // (if it doesn't change the stack pointer, or, for example, writes @@ -400,24 +419,24 @@ struct FunctionDecl : public CallableDecl { bool is_extern{false}; // These are the blocks contained within the function representing the CFG. - std::unordered_map cfg; + std::unordered_map cfg; std::unordered_map locals; - std::unordered_map stack_offsets_at_entry; + std::unordered_map stack_offsets_at_entry; - std::unordered_map stack_offsets_at_exit; + std::unordered_map stack_offsets_at_exit; - std::unordered_map> + std::unordered_map> live_regs_at_entry; - std::unordered_map> + std::unordered_map> live_regs_at_exit; - std::unordered_map> + std::unordered_map> constant_values_at_entry; - std::unordered_map> + std::unordered_map> constant_values_at_exit; // sorted vector of hints @@ -451,10 +470,10 @@ struct FunctionDecl : public CallableDecl { static Result Create(llvm::Function &func, const remill::Arch *arch); - SpecBlockContext GetBlockContext(std::uint64_t addr) const; + SpecBlockContext GetBlockContext(Uid uid) const; void - AddBBContexts(std::unordered_map &contexts) const; + AddBBContexts(std::unordered_map &contexts) const; }; // A call site decl, as represented at a "near ABI" level. This is like a diff --git a/include/anvill/Passes/BasicBlockPass.h b/include/anvill/Passes/BasicBlockPass.h index 7ed23fc93..e3f5aeee0 100644 --- a/include/anvill/Passes/BasicBlockPass.h +++ b/include/anvill/Passes/BasicBlockPass.h @@ -15,7 +15,7 @@ namespace anvill { class BasicBlockContexts { public: virtual std::optional> - GetBasicBlockContextForAddr(uint64_t addr) const = 0; + GetBasicBlockContextForUid(Uid uid) const = 0; virtual const FunctionDecl &GetFunctionAtAddress(uint64_t addr) const = 0; }; @@ -33,9 +33,9 @@ class BasicBlockPass : public llvm::PassInfoMixin> { llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { auto &bb_pass = *static_cast(this); - auto bbaddr = anvill::GetBasicBlockAddr(&F); - if (bbaddr.has_value()) { - auto maybe_bb_cont = contexts.GetBasicBlockContextForAddr(*bbaddr); + auto bbuid = anvill::GetBasicBlockUid(&F); + if (bbuid.has_value()) { + auto maybe_bb_cont = contexts.GetBasicBlockContextForUid(*bbuid); if (maybe_bb_cont) { const BasicBlockContext &bb_cont = *maybe_bb_cont; auto &parent_func = diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index 393ac196b..a84026434 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -91,14 +91,14 @@ struct ValueDecl; class Specification; class SpecBlockContexts : public BasicBlockContexts { - std::unordered_map contexts; + std::unordered_map contexts; std::unordered_map> funcs; public: SpecBlockContexts(const Specification &spec); virtual std::optional> - GetBasicBlockContextForAddr(uint64_t addr) const override; + GetBasicBlockContextForUid(Uid uid) const override; virtual const FunctionDecl & GetFunctionAtAddress(uint64_t addr) const override; @@ -153,6 +153,9 @@ class Specification { // Return the function beginning at `address`, or an empty `shared_ptr`. std::shared_ptr FunctionAt(std::uint64_t address) const; + // Return the basic block at `uid`, or an empty `shared_ptr`. + std::shared_ptr BlockAt(Uid uid) const; + // Return the global variable beginning at `address`, or an empty `shared_ptr`. std::shared_ptr VariableAt(std::uint64_t address) const; diff --git a/include/anvill/Utils.h b/include/anvill/Utils.h index 05b54ad0e..fa18aa4c2 100644 --- a/include/anvill/Utils.h +++ b/include/anvill/Utils.h @@ -142,7 +142,7 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, llvm::BasicBlock *in_block, llvm::Value *state_ptr, llvm::Value *mem_ptr); -std::optional GetBasicBlockAddr(llvm::Function *func); +std::optional GetBasicBlockUid(llvm::Function *func); llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func); diff --git a/lib/ABI.cpp b/lib/ABI.cpp index 70b35e410..1df984d2e 100644 --- a/lib/ABI.cpp +++ b/lib/ABI.cpp @@ -82,7 +82,7 @@ const std::string kAnvillDataProvenanceFunc(kAnvillNamePrefix + // `alloca`. const std::string kAnvillStackZero(kAnvillNamePrefix + "stack_zero"); -const std::string kBasicBlockMetadata(kAnvillNamePrefix + "basic_block_md"); +const std::string kBasicBlockUidMetadata(kAnvillNamePrefix + "basic_block_uid_md"); const std::string kStackMetadata(kAnvillNamePrefix + "stack_alloc"); diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index c66dd6e71..46800c75b 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -77,9 +77,9 @@ VariableDecl::DeclareInModule(const std::string &name, } void FunctionDecl::AddBBContexts( - std::unordered_map &contexts) const { - for (const auto &[addr, _] : this->cfg) { - contexts.insert({addr, this->GetBlockContext(addr)}); + std::unordered_map &contexts) const { + for (const auto &[uid, _] : this->cfg) { + contexts.insert({uid, this->GetBlockContext(uid)}); } } @@ -475,12 +475,12 @@ void CallableDecl::OverrideFunctionTypeWithABIReturnLayout() { namespace { template -V GetWithDef(uint64_t addr, const std::unordered_map &map, V def) { - if (map.find(addr) == map.end()) { +V GetWithDef(Uid uid, const std::unordered_map &map, V def) { + if (map.find(uid) == map.end()) { return def; } - return map.find(addr)->second; + return map.find(uid)->second; } } // namespace @@ -488,16 +488,16 @@ size_t FunctionDecl::GetPointerDisplacement() const { return this->parameter_size + this->parameter_offset; } -SpecBlockContext FunctionDecl::GetBlockContext(std::uint64_t addr) const { +SpecBlockContext FunctionDecl::GetBlockContext(Uid uid) const { return SpecBlockContext( - *this, GetWithDef(addr, this->stack_offsets_at_entry, SpecStackOffsets()), - GetWithDef(addr, this->stack_offsets_at_exit, SpecStackOffsets()), - GetWithDef(addr, this->constant_values_at_entry, + *this, GetWithDef(uid, this->stack_offsets_at_entry, SpecStackOffsets()), + GetWithDef(uid, this->stack_offsets_at_exit, SpecStackOffsets()), + GetWithDef(uid, this->constant_values_at_entry, std::vector()), - GetWithDef(addr, this->constant_values_at_exit, + GetWithDef(uid, this->constant_values_at_exit, std::vector()), - GetWithDef(addr, this->live_regs_at_entry, std::vector()), - GetWithDef(addr, this->live_regs_at_exit, std::vector())); + GetWithDef(uid, this->live_regs_at_entry, std::vector()), + GetWithDef(uid, this->live_regs_at_exit, std::vector())); } std::optional diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index e3b433898..292579109 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -403,14 +403,13 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() { } } - -llvm::MDNode *BasicBlockLifter::GetBasicBlockAnnotation(uint64_t addr) const { - return this->GetAddrAnnotation(addr, this->semantics_module->getContext()); +llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(Uid uid) const { + return this->GetUidAnnotation(uid, this->semantics_module->getContext()); } llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + - std::to_string(this->block_def.addr); + std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid.value); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( @@ -439,8 +438,8 @@ llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto func = bb_func; - func->setMetadata(anvill::kBasicBlockMetadata, - GetBasicBlockAnnotation(this->block_def.addr)); + func->setMetadata(anvill::kBasicBlockUidMetadata, + GetBasicBlockUidAnnotation(this->block_def.uid)); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = @@ -631,14 +630,13 @@ void BasicBlockLifter::TerminateBasicBlockFunction( auto pc = ir.CreateLoad(address_type, bbfunc.next_pc_out); auto sw = ir.CreateSwitch(pc, this->invalid_successor_block); - for (auto e : this->block_def.outgoing_edges) { - auto succ_const = llvm::ConstantInt::get( - llvm::cast(this->address_type), e); - + for (auto edge_uid : this->block_def.outgoing_edges) { auto calling_bb = llvm::BasicBlock::Create(next_mem->getContext(), "", bbfunc.func); llvm::IRBuilder<> calling_bb_builder(calling_bb); - auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(e); + auto edge_bb = this->decl.cfg.find(edge_uid); + CHECK(edge_bb != this->decl.cfg.end()); + auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb->second.uid); auto retval = child_lifter.ControlFlowCallBasicBlockFunction( caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem); if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { @@ -646,6 +644,9 @@ void BasicBlockLifter::TerminateBasicBlockFunction( } else { calling_bb_builder.CreateRet(retval); } + + auto succ_const = llvm::ConstantInt::get( + llvm::cast(this->address_type), edge_bb->second.addr); sw->addCase(succ_const, calling_bb); } diff --git a/lib/Lifters/BasicBlockLifter.h b/lib/Lifters/BasicBlockLifter.h index e6c065a97..4c574c798 100644 --- a/lib/Lifters/BasicBlockLifter.h +++ b/lib/Lifters/BasicBlockLifter.h @@ -107,7 +107,7 @@ class BasicBlockLifter : public CodeLifter { remill::DecodingContext context); - llvm::MDNode *GetBasicBlockAnnotation(uint64_t addr) const; + llvm::MDNode *GetBasicBlockUidAnnotation(Uid uid) const; public: BasicBlockLifter(std::unique_ptr block_context, diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 2b8e921d8..711e0da6f 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -20,6 +20,8 @@ #include +#include "anvill/Declarations.h" + namespace anvill { namespace { // Clear out LLVM variable names. They're usually not helpful. @@ -57,6 +59,7 @@ CodeLifter::CodeLifter(const LifterOptions &options, type_specifier(type_specifier), address_type( llvm::Type::getIntNTy(llvm_context, options.arch->address_size)), + uid_type(llvm::Type::getInt64Ty(llvm_context)), i8_type(llvm::Type::getInt8Ty(llvm_context)), i8_zero(llvm::Constant::getNullValue(i8_type)), i32_type(llvm::Type::getInt32Ty(llvm_context)), @@ -191,6 +194,14 @@ llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, return llvm::MDNode::get(context, pc_md); } +llvm::MDNode *CodeLifter::GetUidAnnotation(Uid uid, + llvm::LLVMContext &context) const { + auto uid_val = llvm::ConstantInt::get( + remill::RecontextualizeType(uid_type, context), uid.value); + auto uid_md = llvm::ValueAsMetadata::get(uid_val); + return llvm::MDNode::get(context, uid_md); +} + // Allocate and initialize the state structure. llvm::Value * CodeLifter::AllocateAndInitializeStateStructure(llvm::BasicBlock *block, diff --git a/lib/Lifters/CodeLifter.h b/lib/Lifters/CodeLifter.h index 195815eeb..2df69cf6f 100644 --- a/lib/Lifters/CodeLifter.h +++ b/lib/Lifters/CodeLifter.h @@ -7,6 +7,7 @@ #include #include +#include "anvill/Declarations.h" #include "anvill/Lifters.h" namespace anvill { @@ -45,6 +46,7 @@ class CodeLifter { const TypeProvider &type_provider; const TypeTranslator &type_specifier; llvm::IntegerType *const address_type; + llvm::IntegerType *const uid_type; // Convenient to keep around. @@ -78,6 +80,8 @@ class CodeLifter { llvm::MDNode *GetAddrAnnotation(uint64_t addr, llvm::LLVMContext &context) const; + llvm::MDNode *GetUidAnnotation(Uid uid, + llvm::LLVMContext &context) const; public: CodeLifter(const LifterOptions &options, llvm::Module *semantics_module, diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 673149617..ca3146ab1 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -323,22 +324,18 @@ llvm::Function *FunctionLifter::DeclareFunction(const FunctionDecl &decl) { return GetOrDeclareFunction(decl); } -BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t addr) { - std::pair key{curr_decl->address, addr}; +BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(Uid uid) { + std::pair key{curr_decl->address, uid.value}; auto lifter = this->bb_lifters.find(key); if (lifter != this->bb_lifters.end()) { return lifter->second; } std::unique_ptr context = std::make_unique( - this->curr_decl->GetBlockContext(addr)); + this->curr_decl->GetBlockContext(uid)); - CodeBlock defblk = {addr, 0, std::unordered_set(), - std::unordered_map()}; - auto maybe_blk = this->curr_decl->cfg.find(addr); - if (maybe_blk != this->curr_decl->cfg.end()) { - defblk = maybe_blk->second; - } + auto &cfg = this->curr_decl->cfg; + CodeBlock defblk = cfg.find(uid)->second; auto inserted = this->bb_lifters.emplace( key, @@ -350,7 +347,7 @@ BasicBlockLifter &FunctionLifter::GetOrCreateBasicBlockLifter(uint64_t addr) { const BasicBlockLifter & FunctionLifter::LiftBasicBlockFunction(const CodeBlock &blk) { - auto &lifter = this->GetOrCreateBasicBlockLifter(blk.addr); + auto &lifter = this->GetOrCreateBasicBlockLifter(blk.uid); lifter.LiftBasicBlockFunction(); return lifter; } @@ -367,8 +364,8 @@ void FunctionLifter::VisitBlocks(llvm::Value *lifted_function_state, << ": " << this->curr_decl->cfg.size(); - for (const auto &[addr, blk] : this->curr_decl->cfg) { - DLOG(INFO) << "Visiting: " << std::hex << addr; + for (const auto &[uid, blk] : this->curr_decl->cfg) { + DLOG(INFO) << "Visiting: " << std::hex << blk.addr << " " << std::dec << uid.value; this->VisitBlock(blk, lifted_function_state, abstract_stack); } } @@ -485,7 +482,7 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { // // TODO: This could be a thunk, that we are maybe lifting on purpose. // How should control flow redirection behave in this case? - auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->func_address); + const auto &entry_lifter = this->GetOrCreateBasicBlockLifter(this->curr_decl->entry_uid); auto call_inst = entry_lifter.CallBasicBlockFunction( ir, lifted_func_st.state_ptr, abstract_stack, this->mem_ptr_ref); @@ -688,8 +685,10 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, std::string prefix = "func" + std::to_string(decl.address); if (!func->isDeclaration()) { - for (auto &[block_addr, block] : decl.cfg) { - std::string name = prefix + "basic_block" + std::to_string(block_addr); + for (auto &[block_uid, block] : decl.cfg) { + CHECK(block_uid == block.uid); + std::string name = prefix + "basic_block" + std::to_string(block.addr) + "_" + std::to_string(block.uid.value); + auto new_version = target_module->getFunction(name); auto old_version = semantics_module->getFunction(name); if (!new_version) { @@ -701,9 +700,9 @@ FunctionLifter::AddFunctionToContext(llvm::Function *func, } remill::CloneFunctionInto(old_version, new_version); new_version->setMetadata( - kBasicBlockMetadata, - this->GetAddrAnnotation(block_addr, module_context)); - CHECK(anvill::GetBasicBlockAddr(new_version).has_value()); + kBasicBlockUidMetadata, + this->GetUidAnnotation(block.uid, module_context)); + CHECK(anvill::GetBasicBlockUid(new_version).has_value()); } } diff --git a/lib/Lifters/FunctionLifter.h b/lib/Lifters/FunctionLifter.h index 9a2670f0e..a10413868 100644 --- a/lib/Lifters/FunctionLifter.h +++ b/lib/Lifters/FunctionLifter.h @@ -104,7 +104,9 @@ class FunctionLifter : public CodeLifter { const FunctionDecl &decl, EntityLifterImpl &lifter_context) const; - BasicBlockLifter &GetOrCreateBasicBlockLifter(uint64_t addr); + // Get or create a basic block lifter for the basic block with specified + // uid. If a lifter for the uid does not exist, this function will create it + BasicBlockLifter &GetOrCreateBasicBlockLifter(Uid uid); const BasicBlockLifter &LiftBasicBlockFunction(const CodeBlock &); @@ -180,7 +182,7 @@ class FunctionLifter : public CodeLifter { // Maps program counters to lifted functions. std::unordered_map addr_to_func; - // maps a bbaddr to the lifter for that block + // maps a uid to the lifter for that block std::unordered_map, BasicBlockLifter> bb_lifters; diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index e3b1db9ca..6f704e43b 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -350,7 +350,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, // lets make sure we eliminate all the basic block functions because we dont care anymore for (auto &f : module.getFunctionList()) { - if (anvill::GetBasicBlockAddr(&f)) { + if (anvill::GetBasicBlockUid(&f)) { f.setLinkage(llvm::GlobalValue::InternalLinkage); } } diff --git a/lib/Passes/RemoveCallIntrinsics.cpp b/lib/Passes/RemoveCallIntrinsics.cpp index ee73227dc..45af36724 100644 --- a/lib/Passes/RemoveCallIntrinsics.cpp +++ b/lib/Passes/RemoveCallIntrinsics.cpp @@ -44,9 +44,9 @@ RemoveCallIntrinsics::runOnIntrinsic(llvm::CallInst *remillFunctionCall, if (auto pc_val = GetMetadata(lifter.Options().pc_metadata_name, *remillFunctionCall); pc_val.has_value()) { - if (auto bb_addr = GetBasicBlockAddr(f); bb_addr.has_value()) { + if (auto bb_uid = GetBasicBlockUid(f); bb_uid.has_value()) { auto block_contexts = spec.GetBlockContexts(); - const auto &bb_ctx = block_contexts.GetBasicBlockContextForAddr(*bb_addr)->get(); + const auto &bb_ctx = block_contexts.GetBasicBlockContextForUid(*bb_uid)->get(); auto func = bb_ctx.GetParentFunctionAddress(); if (auto override_decl = spec.CallSiteAt({func, *pc_val})) { DLOG(INFO) << "Overriding call site at " << std::hex << *pc_val diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 3d9bfbfa7..55593962b 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -255,7 +255,8 @@ llvm::PreservedAnalyses ReplaceStackReferences::runOnBasicBlockFunction( AbstractStack::StackTypeFromSize(F.getContext(), overrunsz)); DLOG(INFO) << "Replacing stack vars in bb: " << std::hex - << *anvill::GetBasicBlockAddr(&F); + << fdecl.address << " " << std::dec + << (*anvill::GetBasicBlockUid(&F)).value; DLOG(INFO) << "Stack size " << cont.GetStackSize(); DLOG(INFO) << "Max stack size " << cont.GetMaxStackSize(); AbstractStack stk( diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 6c72c0444..1e64b62c0 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -516,6 +516,7 @@ Result ProtobufTranslator::DecodeFunction( const ::specification::Function &function) const { FunctionDecl decl; decl.address = function.entry_address(); + decl.entry_uid = Uid{function.entry_uid()}; if (!function.has_callable()) { return std::string("all functions should have a callable"); @@ -531,7 +532,7 @@ Result ProtobufTranslator::DecodeFunction( if (!function.has_frame()) { return std::string("All functions should have a frame"); } - auto frame = function.frame(); + const auto& frame = function.frame(); decl.stack_depth = frame.frame_size(); decl.ret_ptr_offset = frame.return_address_offset(); @@ -611,11 +612,11 @@ Result ProtobufTranslator::DecodeFunction( } void ProtobufTranslator::AddLiveValuesToBB( - std::unordered_map> &map, - uint64_t bb_addr, + std::unordered_map> &map, + Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const { - auto &v = map.insert({bb_addr, std::vector()}).first->second; + auto &v = map.insert({bb_uid, std::vector()}).first->second; for (auto var : values) { auto param = DecodeParameter(var); @@ -629,24 +630,29 @@ void ProtobufTranslator::AddLiveValuesToBB( void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { - for (auto blk : obj.blocks()) { + for (const auto& blk : obj.blocks()) { + std::unordered_set tmp; + for (auto o : blk.second.outgoing_blocks()) { + tmp.insert({o}); + } CodeBlock nblk = { blk.second.address(), blk.second.size(), - {blk.second.outgoing_blocks().begin(), - blk.second.outgoing_blocks().end()}, + tmp, {blk.second.context_assignments().begin(), blk.second.context_assignments().end()}, + {blk.first}, }; - decl.cfg.emplace(blk.first, std::move(nblk)); + decl.cfg.emplace(Uid{blk.first}, std::move(nblk)); } - for (auto &[blk_addr, ctx] : obj.block_context()) { + for (auto &[blk_uid_, ctx] : obj.block_context()) { std::vector stack_offsets_at_entry, stack_offsets_at_exit; std::vector constant_values_at_entry, constant_values_at_exit; - auto blk = decl.cfg[blk_addr]; + Uid blk_uid = {blk_uid_}; + auto blk = decl.cfg[blk_uid]; auto symval_to_domains = [&](const specification::ValueMapping &symval, std::vector &stack_offsets, std::vector &constant_values) { @@ -706,20 +712,20 @@ void ProtobufTranslator::ParseCFGIntoFunction( for (auto &symval : ctx.symvals_at_entry()) { symval_to_domains(symval, - decl.stack_offsets_at_entry[blk_addr].affine_equalities, - decl.constant_values_at_entry[blk_addr]); + decl.stack_offsets_at_entry[blk_uid].affine_equalities, + decl.constant_values_at_entry[blk_uid]); } for (auto &symval : ctx.symvals_at_exit()) { symval_to_domains(symval, - decl.stack_offsets_at_exit[blk_addr].affine_equalities, - decl.constant_values_at_exit[blk_addr]); + decl.stack_offsets_at_exit[blk_uid].affine_equalities, + decl.constant_values_at_exit[blk_uid]); } - this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_addr, + this->AddLiveValuesToBB(decl.live_regs_at_entry, blk_uid, ctx.live_at_entries()); - this->AddLiveValuesToBB(decl.live_regs_at_exit, blk_addr, + this->AddLiveValuesToBB(decl.live_regs_at_exit, blk_uid, ctx.live_at_exits()); } } diff --git a/lib/Protobuf.h b/lib/Protobuf.h index ef50ceb90..327152b65 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -90,8 +90,8 @@ class ProtobufTranslator { FunctionDecl &decl) const; void AddLiveValuesToBB( - std::unordered_map> &map, - uint64_t bb_addr, + std::unordered_map> &map, + Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const; diff --git a/lib/Specification.cpp b/lib/Specification.cpp index b4d20fe15..7e1d96f46 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -9,6 +9,7 @@ #include "Specification.h" #include +#include #include #include #include @@ -67,6 +68,16 @@ SpecificationImpl::ParseSpecification( } auto func_ptr = new FunctionDecl(std::move(func_obj)); + + for (const auto& [uid, bb]: func_ptr->cfg) { + if (uid_to_block.count(uid)) { + std::stringstream ss; + ss << "Duplicate block Uid: " << uid.value; + return ss.str(); + } + uid_to_block[uid] = &bb; + } + functions.emplace_back(func_ptr); address_to_function.emplace(func_address, func_ptr); } @@ -334,7 +345,10 @@ anvill::Result Specification::DecodeFromPB(llvm::LLVMContext &context, const std::string &pb) { ::specification::Specification spec; if (!spec.ParseFromString(pb)) { - return {"Failed to parse specification"}; + auto status = google::protobuf::util::JsonStringToMessage(pb, &spec); + if (!status.ok()) { + return {"Failed to parse specification"}; + } } auto arch{GetArch(context, spec)}; @@ -416,6 +430,17 @@ Specification::FunctionAt(std::uint64_t address) const { } } +// Return the block with `uid`, or an empty `shared_ptr`. +std::shared_ptr +Specification::BlockAt(Uid uid) const { + auto it = impl->uid_to_block.find(uid); + if (it != impl->uid_to_block.end()) { + return std::shared_ptr(impl, it->second); + } else { + return {}; + } +} + // Return the global variable beginning at `address`, or an empty `shared_ptr`. std::shared_ptr Specification::VariableAt(std::uint64_t address) const { @@ -458,8 +483,8 @@ SpecBlockContexts::SpecBlockContexts(const Specification &spec) { } std::optional> -SpecBlockContexts::GetBasicBlockContextForAddr(uint64_t addr) const { - auto cont = this->contexts.find(addr); +SpecBlockContexts::GetBasicBlockContextForUid(Uid uid) const { + auto cont = this->contexts.find(uid); if (cont == this->contexts.end()) { return std::nullopt; } diff --git a/lib/Specification.h b/lib/Specification.h index 2be2f1ab2..07a3a2a33 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -65,6 +65,9 @@ class SpecificationImpl // List of functions that have been parsed from the JSON spec. std::unordered_map address_to_function; + // List of basic blocks that have been parsed from the JSON spec. + std::unordered_map uid_to_block; + // Inverted mapping of byte addresses to the variables containing those // addresses. std::unordered_map address_to_var; diff --git a/lib/Utils.cpp b/lib/Utils.cpp index b36993afd..70ffd8029 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -997,15 +997,15 @@ bool CanBeAliased(llvm::Value *val) { } } -std::optional GetBasicBlockAddr(llvm::Function *func) { - auto meta = func->getMetadata(kBasicBlockMetadata); +std::optional GetBasicBlockUid(llvm::Function *func) { + auto meta = func->getMetadata(kBasicBlockUidMetadata); if (!meta) { return std::nullopt; } auto v = llvm::cast(meta->getOperand(0))->getValue(); - return llvm::cast(v)->getLimitedValue(); + return Uid{llvm::cast(v)->getLimitedValue()}; } llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index 8c4c78b25..ff406b2d3 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -42,6 +42,6 @@ target_include_directories(test_anvill_passes PRIVATE add_test( NAME test_anvill_passes - COMMAND "$" + COMMAND test_anvill_passes WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" ) From befde9cc80f5942ed1b4f2d87200e3352ccc9f7e Mon Sep 17 00:00:00 2001 From: 2over12 Date: Fri, 13 Oct 2023 10:59:12 -0400 Subject: [PATCH 154/163] llvm 17 fixes (#395) * llvm 17 fixes * bump remill * bump llvm, cxx common, xcode, macos, remill * bump matrix * bump remill, use irene main * fix creffolder and dont run on module in inliner --- .github/workflows/build.yml | 17 +++---- Dockerfile | 4 +- lib/CrossReferenceFolder.cpp | 2 +- lib/Lifters/CodeLifter.cpp | 53 ++++++++++++++------ lib/Passes/TransformRemillJumpIntrinsics.cpp | 2 +- lib/Passes/Utils.cpp | 2 +- remill | 2 +- scripts/build.sh | 16 +++--- 8 files changed, 61 insertions(+), 37 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a1852e53c..9171cf43b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,8 +52,8 @@ jobs: matrix: image: - { name: "ubuntu", tag: "22.04" } - llvm: ["16"] - cxxcommon_version: ["v0.3.2"] + llvm: ["17"] + cxxcommon_version: ["v0.6.0"] runs-on: labels: gha-ubuntu-32 @@ -154,7 +154,6 @@ jobs: - name: Clone Ghidra Spec Generation uses: actions/checkout@v3 with: - ref: ekilmer/uid-codeblocks path: ${{ steps.build_paths.outputs.REL_SOURCE }}/irene3 repository: "trailofbits/irene3" fetch-depth: 0 @@ -411,11 +410,11 @@ jobs: strategy: fail-fast: false matrix: - os: ["macos-12"] - llvm: ["16"] - cxxcommon_version: ["v0.3.2"] + os: ["macos-13"] + llvm: ["17"] + cxxcommon_version: ["v0.6.0"] - runs-on: macos-12 + runs-on: macos-13 steps: - name: Setup the build paths @@ -490,7 +489,7 @@ jobs: id: cxxcommon_installer working-directory: ${{ steps.build_paths.outputs.DOWNLOADS }} run: | - folder_name="vcpkg_${{ matrix.os }}_llvm-${{ matrix.llvm }}_xcode-14.2_amd64" + folder_name="vcpkg_${{ matrix.os }}_llvm-${{ matrix.llvm }}_xcode-15.0_amd64" archive_name="${folder_name}.tar.xz" url="https://github.com/lifting-bits/cxx-common/releases/download/${{ matrix.cxxcommon_version}}/${archive_name}" @@ -753,7 +752,7 @@ jobs: strategy: matrix: - llvm: ["16"] + llvm: ["17"] ubuntu: ["22.04"] steps: - uses: actions/checkout@v3 diff --git a/Dockerfile b/Dockerfile index 16a3f2714..3b6cd0041 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ -ARG LLVM_VERSION=16 +ARG LLVM_VERSION=17 ARG ARCH=amd64 ARG UBUNTU_VERSION=22.04 -ARG CXX_COMMON_VERSION=0.3.2 +ARG CXX_COMMON_VERSION=0.6.0 ARG DISTRO_BASE=ubuntu${UBUNTU_VERSION} ARG BUILD_BASE=ubuntu:${UBUNTU_VERSION} ARG LIBRARIES=/opt/trailofbits diff --git a/lib/CrossReferenceFolder.cpp b/lib/CrossReferenceFolder.cpp index ed886bcdf..b57077983 100644 --- a/lib/CrossReferenceFolder.cpp +++ b/lib/CrossReferenceFolder.cpp @@ -385,7 +385,7 @@ CrossReferenceFolderImpl::ResolveConstant(llvm::Constant *const_val) { xr.is_valid = false; if (val.isNegative()) { - if (val.getMinSignedBits() <= 64) { + if (val.getSignificantBits() <= 64) { xr.u.address = static_cast(val.getSExtValue()); xr.is_valid = true; } diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index 711e0da6f..c4418cc2f 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -9,10 +9,17 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -195,7 +202,7 @@ llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr, } llvm::MDNode *CodeLifter::GetUidAnnotation(Uid uid, - llvm::LLVMContext &context) const { + llvm::LLVMContext &context) const { auto uid_val = llvm::ConstantInt::get( remill::RecontextualizeType(uid_type, context), uid.value); auto uid_md = llvm::ValueAsMetadata::get(uid_val); @@ -310,19 +317,37 @@ void CodeLifter::RecursivelyInlineFunctionCallees(llvm::Function *inf) { DCHECK(!llvm::verifyFunction(*inf, &llvm::errs())); - llvm::legacy::FunctionPassManager fpm(inf->getParent()); - fpm.add(llvm::createCFGSimplificationPass()); - fpm.add(llvm::createPromoteMemoryToRegisterPass()); - fpm.add(llvm::createReassociatePass()); - fpm.add(llvm::createDeadStoreEliminationPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createSROAPass()); - fpm.add(llvm::createDeadCodeEliminationPass()); - fpm.add(llvm::createInstructionCombiningPass()); - fpm.doInitialization(); - fpm.run(*inf); - fpm.doFinalization(); - + llvm::ModuleAnalysisManager mam; + llvm::FunctionAnalysisManager fam; + llvm::LoopAnalysisManager lam; + llvm::CGSCCAnalysisManager cam; + + llvm::ModulePassManager mpm; + llvm::FunctionPassManager fpm; + + + llvm::PassBuilder pb; + pb.registerModuleAnalyses(mam); + pb.registerFunctionAnalyses(fam); + pb.registerLoopAnalyses(lam); + pb.registerCGSCCAnalyses(cam); + pb.crossRegisterProxies(lam, fam, cam, mam); + + fpm.addPass(llvm::SimplifyCFGPass()); + fpm.addPass(llvm::PromotePass()); + fpm.addPass(llvm::ReassociatePass()); + fpm.addPass(llvm::DSEPass()); + fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); + fpm.addPass(llvm::DCEPass()); + fpm.addPass(llvm::InstCombinePass()); + + fpm.run(*inf, fam); + + mam.clear(); + fam.clear(); + lam.clear(); + cam.clear(); ClearVariableNames(inf); } diff --git a/lib/Passes/TransformRemillJumpIntrinsics.cpp b/lib/Passes/TransformRemillJumpIntrinsics.cpp index 1e470fa25..0fca8eab4 100644 --- a/lib/Passes/TransformRemillJumpIntrinsics.cpp +++ b/lib/Passes/TransformRemillJumpIntrinsics.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -23,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/Passes/Utils.cpp b/lib/Passes/Utils.cpp index 972daa38f..86c6281ec 100644 --- a/lib/Passes/Utils.cpp +++ b/lib/Passes/Utils.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include #include namespace anvill { diff --git a/remill b/remill index 22b3d4b23..018324821 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit 22b3d4b2318c6ba84cb4e66abb2d37a39a5a33bf +Subproject commit 01832482184da13024f5c511fdb582c728ab843c diff --git a/scripts/build.sh b/scripts/build.sh index 56c8862e4..2eeaa5a93 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -25,8 +25,8 @@ CURR_DIR=$( pwd ) BUILD_DIR="${CURR_DIR}/anvill-build" REMILL_BUILD_DIR="${CURR_DIR}/remill-build" INSTALL_DIR=/usr/local -LLVM_VERSION=llvm-16 -CXX_COMMON_VERSION="0.3.2" +LLVM_VERSION=llvm-17 +CXX_COMMON_VERSION="0.6.0" OS_VERSION=unknown ARCH_VERSION=unknown BUILD_FLAGS= @@ -175,11 +175,11 @@ function DownloadLibraries #BUILD_FLAGS="${BUILD_FLAGS} -DCMAKE_OSX_SYSROOT=${sdk_root}" # Min version supported - OS_VERSION="macos-12" - XCODE_VERSION="14.2" + OS_VERSION="macos-13" + XCODE_VERSION="15.0" if [[ "${SYSTEM_VERSION}" == "13.*" ]]; then echo "Found MacOS Ventura" - OS_VERSION="macos-12" + OS_VERSION="macos-13" elif [[ "${SYSTEM_VERSION}" == "12.*" ]]; then echo "Found MacOS Monterey" OS_VERSION="macos-12" @@ -341,8 +341,8 @@ function Package function GetLLVMVersion { case ${1} in - 16) - LLVM_VERSION=llvm-16 + 17) + LLVM_VERSION=llvm-17 return 0 ;; *) @@ -360,7 +360,7 @@ function Help echo "" echo "Options:" echo " --prefix Change the default (${INSTALL_DIR}) installation prefix." - echo " --llvm-version Change the default (16) LLVM version." + echo " --llvm-version Change the default (17) LLVM version." echo " --build-dir Change the default (${BUILD_DIR}) build directory." echo " --debug Build with Debug symbols." echo " --extra-cmake-args Extra CMake arguments to build with." From cadafba9a2da47894bbdc88af3b038f9ccc0e453 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 24 Oct 2023 15:47:03 -0400 Subject: [PATCH 155/163] Fix accesses to globals that occur in the middle of the global (#399) * devectorize * shift indices into second vec * make big endian work * add some sort of actual test * handle access in middle * fix dl * dont go off end of iter * null check on adapt * get datalayout from arch * bump ci * dlog and comment --- CMakeLists.txt | 1 + include/anvill/Passes/RewriteVectorOps.h | 23 ++ include/anvill/Providers.h | 1 + lib/CMakeLists.txt | 1 + lib/Optimize.cpp | 3 + lib/Passes/ConvertAddressesToEntityUses.cpp | 9 +- lib/Passes/RewriteVectorOps.cpp | 230 ++++++++++++++++++ lib/Providers/TypeProvider.cpp | 25 +- lib/Specification.h | 2 +- libraries/lifting-tools-ci | 2 +- tests/anvill_passes/CMakeLists.txt | 1 + .../anvill_passes/data/VectorRewriteSmall.ll | 8 + tests/anvill_passes/data/VectorToRewrite.ll | 10 + tests/anvill_passes/src/VectorRW.cpp | 70 ++++++ 14 files changed, 376 insertions(+), 10 deletions(-) create mode 100644 include/anvill/Passes/RewriteVectorOps.h create mode 100644 lib/Passes/RewriteVectorOps.cpp create mode 100644 tests/anvill_passes/data/VectorRewriteSmall.ll create mode 100644 tests/anvill_passes/data/VectorToRewrite.ll create mode 100644 tests/anvill_passes/src/VectorRW.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c1a848ecc..3f165ed66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ find_package(glog CONFIG REQUIRED) find_package(Z3 CONFIG REQUIRED) find_package(doctest CONFIG REQUIRED) find_package(LLVM CONFIG REQUIRED) +include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) llvm_map_components_to_libnames(llvm_libs support core irreader bitreader bitwriter) find_package(sleigh CONFIG) diff --git a/include/anvill/Passes/RewriteVectorOps.h b/include/anvill/Passes/RewriteVectorOps.h new file mode 100644 index 000000000..621635d57 --- /dev/null +++ b/include/anvill/Passes/RewriteVectorOps.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace anvill { + +class RewriteVectorOps final : public llvm::PassInfoMixin { + public: + static llvm::StringRef name(void); + + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &AM); +}; + +} // namespace anvill diff --git a/include/anvill/Providers.h b/include/anvill/Providers.h index b8251f021..c14373f62 100644 --- a/include/anvill/Providers.h +++ b/include/anvill/Providers.h @@ -183,6 +183,7 @@ class DefaultCallableTypeProvider : public ProxyTypeProvider { class SpecificationTypeProvider : public BaseTypeProvider { private: std::shared_ptr impl; + llvm::DataLayout layout; public: virtual ~SpecificationTypeProvider(void); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 3d9eb90a4..16d3e66b1 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -60,6 +60,7 @@ set(anvill_passes ReplaceStackReferences RemoveCallIntrinsics InlineBasicBlockFunctions + RewriteVectorOps ) set(anvill_arch_HEADERS diff --git a/lib/Optimize.cpp b/lib/Optimize.cpp index 6f704e43b..c0f1c468f 100644 --- a/lib/Optimize.cpp +++ b/lib/Optimize.cpp @@ -76,6 +76,7 @@ #include #include +#include "anvill/Passes/RewriteVectorOps.h" #include "anvill/Passes/SplitStackFrameAtReturnAddress.h" #include "anvill/Specification.h" @@ -279,6 +280,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, fpm.addPass(anvill::RemoveCallIntrinsics(xr, spec, lifter)); fpm.addPass(llvm::VerifierPass()); fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG)); + fpm.addPass(RewriteVectorOps()); fpm.addPass(llvm::VerifierPass()); AddConvertAddressesToEntityUses(fpm, xr, pc_metadata_id); @@ -302,6 +304,7 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module, AddLowerRemillUndefinedIntrinsics(second_fpm); second_fpm.addPass(llvm::VerifierPass()); AddRemoveFailedBranchHints(second_fpm); + fpm.addPass(RewriteVectorOps()); second_fpm.addPass(llvm::VerifierPass()); second_fpm.addPass(llvm::NewGVNPass()); second_fpm.addPass(llvm::VerifierPass()); diff --git a/lib/Passes/ConvertAddressesToEntityUses.cpp b/lib/Passes/ConvertAddressesToEntityUses.cpp index 1fcbf5a20..ea04c8fee 100644 --- a/lib/Passes/ConvertAddressesToEntityUses.cpp +++ b/lib/Passes/ConvertAddressesToEntityUses.cpp @@ -84,14 +84,17 @@ ConvertAddressesToEntityUses::run(llvm::Function &function, auto ent_type = llvm::dyn_cast(entity->getType()); CHECK_NOTNULL(ent_type); + auto adapted = AdaptToType(ir, entity, val_type); + if (!adapted) { + continue; + } if (auto phi = llvm::dyn_cast(user_inst)) { auto pred_block = phi->getIncomingBlock(*(xref_use.use)); llvm::IRBuilder<> ir(pred_block->getTerminator()); - xref_use.use->set(AdaptToType(ir, entity, val_type)); + xref_use.use->set(adapted); } else { - llvm::IRBuilder<> ir(user_inst); - xref_use.use->set(AdaptToType(ir, entity, val_type)); + xref_use.use->set(adapted); } if (auto val_inst = llvm::dyn_cast(val)) { diff --git a/lib/Passes/RewriteVectorOps.cpp b/lib/Passes/RewriteVectorOps.cpp new file mode 100644 index 000000000..ab71a0cd5 --- /dev/null +++ b/lib/Passes/RewriteVectorOps.cpp @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +// Goal here is to rewrite vector twiddling to integer ops +/* + %.sroa.23.24.vec.expand = shufflevector <4 x i8> %11, <4 x i8> poison, <8 x i32> + %.sroa.23.28.vec.expand = shufflevector <4 x i8> %12, <4 x i8> poison, <8 x i32> + %.sroa.23.28.vecblend = shufflevector <8 x i8> %.sroa.23.24.vec.expand, <8 x i8> %.sroa.23.28.vec.expand, <8 x i32> + + so in this case well get something like (le): + a = shl(zext(%12)), 32 + b = zext(11) + c=or(a,b) + +*/ +namespace anvill { + +llvm::StringRef RewriteVectorOps::name(void) { + return "RewriteVectorOps"; +} + +std::optional IntegerTypeForVector(llvm::VectorType *vec) { + if (!vec->isScalableTy()) { + return llvm::IntegerType::get( + vec->getContext(), vec->getPrimitiveSizeInBits().getFixedValue()); + } + return std::nullopt; +} + +struct RewrittenInteger { + llvm::Value *target; + llvm::IntegerType *to_int_ty; + std::pair bit_range; + uint32_t bitshift; + bool poison; +}; + +struct DecomposeState { + uint32_t curr_index; + const llvm::ShuffleVectorInst &sv; + + + bool ConsumedAll() { + return curr_index >= sv.getShuffleMask().size(); + } + + uint32_t GetOpLengths() { + auto v = llvm::cast(sv.getOperand(0)->getType()); + return v->getElementCount().getKnownMinValue(); + } + + bool isInSameVec(uint32_t ind1, uint32_t ind2) { + return (ind1 < GetOpLengths() && ind2 < GetOpLengths()) || + (ind1 >= GetOpLengths() && ind2 >= GetOpLengths()); + } + + + std::optional ElementSize() { + auto ty = llvm::cast(this->sv.getOperand(0)->getType()); + auto el_ty = ty->getElementType(); + auto sz = el_ty->getPrimitiveSizeInBits(); + if (sz) { + return sz; + } + + return std::nullopt; + } + + + std::optional ConsumeNext() { + uint32_t start_index = this->curr_index; + int first_end = sv.getMaskValue(this->curr_index); + int prev_ind = first_end; + this->curr_index += 1; + DLOG(INFO) << "first: " << first_end; + // We are looking for the last mask index such that [start_index,curr_index) is a seq of either poisons + // or contigous accesses to a single op + while (!this->ConsumedAll()) { + auto next = sv.getMaskValue(this->curr_index); + DLOG(INFO) << "next: " << next; + // we can either group poisons or sequences + if (!(next == llvm::PoisonMaskElem && prev_ind == llvm::PoisonMaskElem) && + (!isInSameVec(prev_ind, next) || prev_ind + 1 != next)) { + break; + } + + prev_ind = next; + this->curr_index += 1; + } + + + bool is_first_op = first_end < static_cast(GetOpLengths()); + + llvm::Value *target = is_first_op ? sv.getOperand(0) : sv.getOperand(1); + std::pair element_range = std::make_pair(0, 0); + auto poison = first_end == llvm::PoisonMaskElem; + if (!poison) { + element_range = std::make_pair(first_end, prev_ind + 1); + if (!is_first_op) { + element_range.first = element_range.first - GetOpLengths(); + element_range.second = element_range.second - GetOpLengths(); + } + } // the prev_ind is the last inclusive indice so bump one to make this an [) range + // convert the element range into a bit range + CHECK(element_range.second >= element_range.first); + auto sz = this->ElementSize(); + if (!sz) { + return std::nullopt; + } + + std::pair bit_range; + // first member of the range is the lshr for cutting off low bits + // second describes the mask + if (sv.getModule()->getDataLayout().isLittleEndian()) { + bit_range = + std::make_pair(element_range.first * *sz, element_range.second * *sz); + } else { + bit_range = std::make_pair((GetOpLengths() - element_range.second) * *sz, + (GetOpLengths() - element_range.first) * *sz); + } + + auto ity = + IntegerTypeForVector(llvm::cast(target->getType())); + if (!ity) { + return std::nullopt; + } + uint32_t bitshift; + if (sv.getModule()->getDataLayout().isLittleEndian()) { + bitshift = *sz * start_index; + } else { + auto op_distance = sv.getType()->getElementCount().getFixedValue() - + (element_range.second - element_range.first); + DLOG(INFO) << remill::LLVMThingToString(target); + DLOG(INFO) << "odist: " << op_distance; + DLOG(INFO) << "start_ind: " << start_index; + DLOG(INFO) << "diff: " << (op_distance - start_index); + bitshift = *sz * (op_distance - start_index); + } + return RewrittenInteger{target, *ity, bit_range, bitshift, poison}; + } +}; + +// this isnt super smart but we just check if +// each vector is extracted once +std::optional> +Rewrite(const llvm::ShuffleVectorInst &sv) { + std::vector rewrites; + DecomposeState st{0, sv}; + while (!st.ConsumedAll()) { + auto nxt = st.ConsumeNext(); + if (!nxt) { + return std::nullopt; + } + rewrites.push_back(*nxt); + } + return rewrites; +} + + +llvm::PreservedAnalyses +RewriteVectorOps::run(llvm::Function &F, llvm::FunctionAnalysisManager &AM) { + std::vector svs; + for (auto &insn : llvm::instructions(F)) { + if (llvm::ShuffleVectorInst *sv = + llvm::dyn_cast(&insn)) { + svs.push_back(sv); + } + } + + auto pres = llvm::PreservedAnalyses::all(); + for (auto sv : svs) { + auto vec_type = sv->getType(); + if (vec_type->isScalableTy()) { + LOG(ERROR) << "Could not rewrite sv, unable to rewrite scalable type" + << remill::LLVMThingToString(sv); + continue; + } + + auto maybe_rws = Rewrite(*sv); + if (!maybe_rws) { + LOG(ERROR) << "Could not rewrite sv, unable to split" + << remill::LLVMThingToString(sv); + continue; + } + auto rws = *maybe_rws; + auto base_int_ty = llvm::IntegerType::get( + F.getContext(), vec_type->getScalarSizeInBits() * + vec_type->getElementCount().getFixedValue()); + llvm::Value *base_value = llvm::Constant::getNullValue(base_int_ty); + llvm::IRBuilder<> ir(sv); + for (const auto &rw : rws) { + // it must be vector as it's an operand to llvm + if (!rw.poison) { + auto init_int = ir.CreateBitCast(rw.target, rw.to_int_ty); + + auto casted = ir.CreateZExtOrTrunc(init_int, base_int_ty); + auto target_itype = + llvm::IntegerType::get(F.getContext(), rw.bit_range.second); + auto dropped_high_bits = ir.CreateAnd( + casted, + llvm::ConstantInt::get(base_int_ty, target_itype->getBitMask())); + auto extracted = ir.CreateLShr(dropped_high_bits, rw.bit_range.first); + auto placed = ir.CreateShl(extracted, rw.bitshift); + base_value = ir.CreateOr(base_value, placed); + } + } + auto r = ir.CreateBitCast(base_value, vec_type); + sv->replaceAllUsesWith(r); + sv->eraseFromParent(); + pres = llvm::PreservedAnalyses::none(); + } + + return pres; +} + + +} // namespace anvill \ No newline at end of file diff --git a/lib/Providers/TypeProvider.cpp b/lib/Providers/TypeProvider.cpp index ebe51be6f..661f4c033 100644 --- a/lib/Providers/TypeProvider.cpp +++ b/lib/Providers/TypeProvider.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,8 @@ #include #include +#include +#include #include #include "Specification.h" @@ -79,7 +82,8 @@ SpecificationTypeProvider::~SpecificationTypeProvider(void) {} SpecificationTypeProvider::SpecificationTypeProvider(const Specification &spec) : BaseTypeProvider(spec.impl->type_translator), - impl(spec.impl) {} + impl(spec.impl), + layout(spec.Arch()->DataLayout()) {} // Try to return the type of a function starting at address `address`. This // type is the prototype of the function. @@ -96,12 +100,23 @@ SpecificationTypeProvider::TryGetFunctionType(uint64_t address) const { std::optional SpecificationTypeProvider::TryGetVariableType(uint64_t address, llvm::Type *) const { - auto var_it = impl->address_to_var.find(address); - if (var_it != impl->address_to_var.end()) { - return *(var_it->second); - } else { + + auto var_it = impl->address_to_var.lower_bound(address); + if (var_it != impl->address_to_var.begin() && var_it->first != address) { + var_it--; + } + + if (var_it == impl->address_to_var.end()) { return std::nullopt; } + + auto v = var_it->second; + if (v->type && address >= v->address && + address < v->address + this->layout.getTypeSizeInBits(v->type) / 8) { + return *v; + } + + return std::nullopt; } std::optional diff --git a/lib/Specification.h b/lib/Specification.h index 07a3a2a33..59f20dc64 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -70,7 +70,7 @@ class SpecificationImpl // Inverted mapping of byte addresses to the variables containing those // addresses. - std::unordered_map address_to_var; + std::map address_to_var; // NOTE(pag): We used ordered containers so that any type of round-tripping diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 7f02cf509..898caf21d 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 7f02cf50924fe99751ead9952885ce4231e93cdc +Subproject commit 898caf21d5b04a4e4151cf93d2198cf05d629129 diff --git a/tests/anvill_passes/CMakeLists.txt b/tests/anvill_passes/CMakeLists.txt index ff406b2d3..56a7bdeb4 100644 --- a/tests/anvill_passes/CMakeLists.txt +++ b/tests/anvill_passes/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(test_anvill_passes src/RemoveStackPointerCExprs.cpp src/RecoverEntityUses.cpp src/TestAbstractStackBB.cpp + src/VectorRW.cpp ) target_link_libraries(test_anvill_passes PRIVATE diff --git a/tests/anvill_passes/data/VectorRewriteSmall.ll b/tests/anvill_passes/data/VectorRewriteSmall.ll new file mode 100644 index 000000000..5f15cdcb2 --- /dev/null +++ b/tests/anvill_passes/data/VectorRewriteSmall.ll @@ -0,0 +1,8 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx-macho" + +define <2 x float> @f(<4 x i8> %v1 , <4 x i8> %v2) { + %.sroa.23.28.vecblend = shufflevector <4 x i8> %v1, <4 x i8> %v2, <8 x i32> + %casted = bitcast <8 x i8> %.sroa.23.28.vecblend to <2 x float> + ret <2 x float> %casted +} \ No newline at end of file diff --git a/tests/anvill_passes/data/VectorToRewrite.ll b/tests/anvill_passes/data/VectorToRewrite.ll new file mode 100644 index 000000000..6da1d9798 --- /dev/null +++ b/tests/anvill_passes/data/VectorToRewrite.ll @@ -0,0 +1,10 @@ +target datalayout = "E-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx-macho" + +define <2 x float> @f(<5 x i8> %v1 , <4 x i8> %v2) { + %.sroa.23.24.vec.expand = shufflevector <5 x i8> %v1, <5 x i8> poison, <8 x i32> + %.sroa.23.28.vec.expand = shufflevector <4 x i8> %v2, <4 x i8> poison, <8 x i32> + %.sroa.23.28.vecblend = shufflevector <8 x i8> %.sroa.23.24.vec.expand, <8 x i8> %.sroa.23.28.vec.expand, <8 x i32> + %casted = bitcast <8 x i8> %.sroa.23.28.vecblend to <2 x float> + ret <2 x float> %casted +} \ No newline at end of file diff --git a/tests/anvill_passes/src/VectorRW.cpp b/tests/anvill_passes/src/VectorRW.cpp new file mode 100644 index 000000000..5d5a3d154 --- /dev/null +++ b/tests/anvill_passes/src/VectorRW.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019-present, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "Utils.h" + +namespace anvill { +static std::unique_ptr +runVectorRW(llvm::LLVMContext &llvm_context, const std::string &module_name, + const std::string &function_name) { + + + auto module = LoadTestData(llvm_context, module_name); + + auto arch = remill::Arch::Build(&llvm_context, remill::GetOSName("linux"), + remill::GetArchName("amd64")); + + REQUIRE(arch != nullptr); + + CHECK(RunFunctionPass(module.get(), RewriteVectorOps())); + + for (auto &f : module->functions()) { + for (auto &insn : llvm::instructions(f)) { + CHECK(!llvm::isa(&insn)); + } + } + + + return module; +} + + +TEST_SUITE("Devectorize") { + TEST_CASE("Devectorize Blend") { + llvm::LLVMContext llvm_context; + auto mod = runVectorRW(llvm_context, "VectorToRewrite.ll", "f"); + mod->dump(); + } + + TEST_CASE("Small Vec") { + llvm::LLVMContext llvm_context; + auto mod = runVectorRW(llvm_context, "VectorRewriteSmall.ll", "f"); + mod->dump(); + } +} + + +} // namespace anvill From 0b5b329130ab60bd2c92dab0bfb538493331b5c4 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:15:39 -0400 Subject: [PATCH 156/163] fix typo oredered_locs => ordered_locs (#401) --- include/anvill/Declarations.h | 2 +- lib/Declarations.cpp | 10 +++++----- lib/Lifters/BasicBlockLifter.cpp | 6 +++--- lib/Lifters/FunctionLifter.cpp | 2 +- lib/Passes/ReplaceStackReferences.cpp | 28 +++++++++++++-------------- lib/Protobuf.cpp | 6 +++--- lib/Utils.cpp | 20 +++++++++---------- 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index f2de10367..d4eb6332e 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -104,7 +104,7 @@ struct LowLoc { // the caller allocate the space, and pass a pointer to that space into // the callee, and so that should be represented using a parameter. struct ValueDecl { - std::vector oredered_locs; + std::vector ordered_locs; TypeSpec spec_type; diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 46800c75b..5d642baf6 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -102,7 +102,7 @@ BasicBlockContext::LiveParamsAtEntryAndExit() const { auto add_to_set = [](const std::vector ¶ms, std::unordered_set &locs_to_add) { for (const auto &p : params) { - std::copy(p.oredered_locs.begin(), p.oredered_locs.end(), + std::copy(p.ordered_locs.begin(), p.ordered_locs.end(), std::inserter(locs_to_add, locs_to_add.end())); } }; @@ -119,23 +119,23 @@ BasicBlockContext::LiveParamsAtEntryAndExit() const { &covered_live_exit](std::vector params) { for (auto p : params) { auto completely_covered = - std::all_of(p.oredered_locs.begin(), p.oredered_locs.end(), + std::all_of(p.ordered_locs.begin(), p.ordered_locs.end(), [&covered](const LowLoc &loc) -> bool { return covered.find(loc) != covered.end(); }); auto live_at_ent = std::any_of( - p.oredered_locs.begin(), p.oredered_locs.end(), + p.ordered_locs.begin(), p.ordered_locs.end(), [&covered_live_ent](const LowLoc &loc) -> bool { return covered_live_ent.find(loc) != covered_live_ent.end(); }); auto live_at_exit = std::any_of( - p.oredered_locs.begin(), p.oredered_locs.end(), + p.ordered_locs.begin(), p.ordered_locs.end(), [&covered_live_exit](const LowLoc &loc) -> bool { return covered_live_exit.find(loc) != covered_live_exit.end(); }); if (!completely_covered) { - std::copy(p.oredered_locs.begin(), p.oredered_locs.end(), + std::copy(p.ordered_locs.begin(), p.ordered_locs.end(), std::inserter(covered, covered.end())); res.push_back({p, live_at_ent, live_at_exit}); } diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index 292579109..fded551f8 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -452,7 +452,7 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { arg->setName(var.name); } - if (std::all_of(var.oredered_locs.begin(), var.oredered_locs.end(), + if (std::all_of(var.ordered_locs.begin(), var.ordered_locs.end(), [](const LowLoc &loc) -> bool { return loc.reg; })) { // Registers should not have aliases, or be captured arg->addAttr(llvm::Attribute::get(llvm_context, @@ -755,9 +755,9 @@ llvm::CallInst *BasicBlockLifter::CallBasicBlockFunction( if (HasMemLoc(repr_var)) { // TODO(Ian): the assumption here since we are able to build a single pointer here into the frame is that // svars are single valuedecl contigous - CHECK(repr_var.oredered_locs.size() == 1); + CHECK(repr_var.ordered_locs.size() == 1); auto stack_ptr = stack.PointerToStackMemberFromOffset( - builder, repr_var.oredered_locs[0].mem_offset); + builder, repr_var.ordered_locs[0].mem_offset); if (stack_ptr) { return *stack_ptr; } else { diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index ca3146ab1..f6ffd2d1e 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -283,7 +283,7 @@ void FunctionLifter::CallLiftedFunctionFromNativeFunction( AnnotateInstructions(block, pc_annotation_id, GetPCAnnotation(func_address)); llvm::Value *ret_val = nullptr; - if (decl.returns.oredered_locs.size() != 0 && + if (decl.returns.ordered_locs.size() != 0 && !decl.returns.type->isVoidTy()) { ret_val = LoadLiftedValue(decl.returns, types, intrinsics, this->options.arch, diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 55593962b..60c94a38d 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -160,8 +160,8 @@ class StackModel { // this feels weird maybe it should be all stack variables but then if the variable isnt live... // we will have discovered something that should have been live. for (const auto &v : cont.LiveParamsAtEntryAndExit()) { - if (HasMemLoc(v.param) && v.param.oredered_locs.size() == 1 && - v.param.oredered_locs[0].mem_reg->name == + if (HasMemLoc(v.param) && v.param.ordered_locs.size() == 1 && + v.param.ordered_locs[0].mem_reg->name == arch->StackPointerRegisterName()) { this->InsertFrameVar(index, v.param); } @@ -190,7 +190,7 @@ class StackModel { auto prev_decl = (--prec)->second; - CHECK(prev_decl.decl.oredered_locs[0].mem_offset <= off); + CHECK(prev_decl.decl.ordered_locs[0].mem_offset <= off); return {prev_decl}; } @@ -203,16 +203,16 @@ class StackModel { } DLOG(INFO) << "value found lte offset: " - << vlte->decl.oredered_locs[0].mem_offset << " " << off; + << vlte->decl.ordered_locs[0].mem_offset << " " << off; - auto offset_into_var = off - vlte->decl.oredered_locs[0].mem_offset; + auto offset_into_var = off - vlte->decl.ordered_locs[0].mem_offset; if (offset_into_var < static_cast(GetParamDeclSize(vlte->decl))) { return {{offset_into_var, *vlte}}; } DLOG(INFO) << "Looking for off " << off << " but not fitting " << offset_into_var << " got off " - << vlte->decl.oredered_locs[0].mem_offset; + << vlte->decl.ordered_locs[0].mem_offset; return std::nullopt; } @@ -225,24 +225,24 @@ class StackModel { void InsertFrameVar(size_t index, ParameterDecl var) { - if (VarOverlaps(var.oredered_locs[0].mem_offset) || - VarOverlaps(var.oredered_locs[0].mem_offset + GetParamDeclSize(var) - + if (VarOverlaps(var.ordered_locs[0].mem_offset) || + VarOverlaps(var.ordered_locs[0].mem_offset + GetParamDeclSize(var) - 1)) { - auto oparam = GetOverlappingParam(var.oredered_locs[0].mem_offset); - if (!VarOverlaps(var.oredered_locs[0].mem_offset)) { - oparam = GetOverlappingParam(var.oredered_locs[0].mem_offset + + auto oparam = GetOverlappingParam(var.ordered_locs[0].mem_offset); + if (!VarOverlaps(var.ordered_locs[0].mem_offset)) { + oparam = GetOverlappingParam(var.ordered_locs[0].mem_offset + GetParamDeclSize(var) - 1); } LOG(FATAL) << "Inserting variable that overlaps with current frame " - << var.oredered_locs[0].mem_offset + << var.ordered_locs[0].mem_offset << " with size: " << GetParamDeclSize(var) << " Overlaps with " - << oparam->decl.decl.oredered_locs[0].mem_offset + << oparam->decl.decl.ordered_locs[0].mem_offset << " with size " << GetParamDeclSize(oparam->decl.decl); } - this->frame.insert({var.oredered_locs[0].mem_offset, {index, var}}); + this->frame.insert({var.ordered_locs[0].mem_offset, {index, var}}); } }; diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 1e64b62c0..9723d131c 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -243,7 +243,7 @@ Result ProtobufTranslator::ParseIntoCallableDecl( // Figure out the return type of this function based off the return // values. llvm::Type *ret_type = ret_type = decl.returns.type; - if (decl.returns.oredered_locs.empty()) { + if (decl.returns.ordered_locs.empty()) { ret_type = llvm::Type::getVoidTy(context); } @@ -317,7 +317,7 @@ ProtobufTranslator::ValueDeclFromOrderedLowLoc(std::vector loc, const char *desc) const { ValueDecl decl; - decl.oredered_locs = std::move(loc); + decl.ordered_locs = std::move(loc); decl.spec_type = type; auto llvm_type = type_translator.DecodeFromSpec(decl.spec_type); if (!llvm_type.Succeeded()) { @@ -701,7 +701,7 @@ void ProtobufTranslator::ParseCFGIntoFunction( symval.curr_val().constant().is_tainted_by_pc(); DLOG(INFO) << "Adding global register override for " - << const_val.target_value.oredered_locs[0].reg->name << " " + << const_val.target_value.ordered_locs[0].reg->name << " " << std::hex << const_val.value; constant_values.push_back(const_val); } else { diff --git a/lib/Utils.cpp b/lib/Utils.cpp index 70ffd8029..5902bd06d 100644 --- a/lib/Utils.cpp +++ b/lib/Utils.cpp @@ -530,19 +530,19 @@ llvm::Value *StoreNativeValue(llvm::Value *native_val, const ValueDecl &decl, CHECK_EQ(module, intrinsics.read_memory_8->getParent()); CHECK_EQ(native_val->getType(), decl_type); - if (decl.oredered_locs.size() == 1) { - return StoreSubcomponent(native_val, decl.oredered_locs.at(0), types, + if (decl.ordered_locs.size() == 1) { + return StoreSubcomponent(native_val, decl.ordered_locs.at(0), types, intrinsics, ir, state_ptr, mem_ptr); } else { unsigned int ind = 0; - auto sty = CreateDeclSty(decl.oredered_locs, context); + auto sty = CreateDeclSty(decl.ordered_locs, context); auto curr_val = ir.CreateAlloca(sty); ir.CreateStore(native_val, curr_val); auto mem = mem_ptr; - for (const auto &comp : decl.oredered_locs) { + for (const auto &comp : decl.ordered_locs) { auto compvl = ExtractSubcomponent(ind, LocType(comp, context), curr_val, sty, ir); mem = StoreSubcomponent(compvl, comp, types, intrinsics, ir, state_ptr, @@ -612,15 +612,15 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, const remill::IntrinsicTable &intrinsics, const remill::Arch *arch, llvm::IRBuilder<> &ir, llvm::Value *state_ptr, llvm::Value *mem_ptr) { - if (decl.oredered_locs.size() == 1) { - return LoadSubcomponent(decl.oredered_locs[0], decl.type, types, intrinsics, + if (decl.ordered_locs.size() == 1) { + return LoadSubcomponent(decl.ordered_locs[0], decl.type, types, intrinsics, ir, state_ptr, mem_ptr); } else { uint64_t offset = 0; std::vector comps; auto dl = arch->DataLayout(); - for (const auto &loc : decl.oredered_locs) { + for (const auto &loc : decl.ordered_locs) { auto subty = GetSubcomponentType(loc, offset, decl.type, dl); if (!subty) { @@ -634,7 +634,7 @@ llvm::Value *LoadLiftedValue(const ValueDecl &decl, const TypeDictionary &types, offset += loc.Size(); } - auto sty = CreateDeclSty(decl.oredered_locs, state_ptr->getContext()); + auto sty = CreateDeclSty(decl.ordered_locs, state_ptr->getContext()); return BuildMultiComponentValue(ir, comps, sty, decl.type, dl); } } @@ -1013,12 +1013,12 @@ llvm::Argument *GetBasicBlockStackPtr(llvm::Function *func) { } bool HasMemLoc(const ValueDecl &v) { - return std::any_of(v.oredered_locs.begin(), v.oredered_locs.end(), + return std::any_of(v.ordered_locs.begin(), v.ordered_locs.end(), [](const LowLoc &loc) -> bool { return loc.mem_reg; }); } bool HasRegLoc(const ValueDecl &v) { - return std::any_of(v.oredered_locs.begin(), v.oredered_locs.end(), + return std::any_of(v.ordered_locs.begin(), v.ordered_locs.end(), [](const LowLoc &loc) -> bool { return loc.reg; }); } From 9cfd0d6ea32a37c306d189498e0a8e6e446f4f98 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Fri, 10 Nov 2023 09:03:03 -0500 Subject: [PATCH 157/163] Sleigh sparc (#387) * update link to empirehacking slack (#383) * use sparc sleigh * update submodule * bump remill * bump remill * update remill * pass build flags to remill for debug symbols * update remill * add noreturn flag * correctly handle noreturn pt1 * fix return value to also take into account noreturn * bump remill * bump remill * update remill * bump remill * bump remill * bump remill * bump remill * bump remill * adapt type for stack offsets * use specified register directly * bump remill * bump remill --------- Co-authored-by: James Olds Co-authored-by: 2over12 --- README.md | 2 +- data_specifications/specification.proto | 3 ++- include/anvill/Specification.h | 1 + lib/Lifters/BasicBlockLifter.cpp | 22 +++++++++++++++++----- lib/Lifters/CodeLifter.cpp | 10 ++++------ lib/Specification.cpp | 3 ++- libraries/lifting-tools-ci | 2 +- remill | 2 +- scripts/build.sh | 1 + 9 files changed, 30 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 63fb13086..cdacbe7ca 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ plugin that is currently closed source. You can checkout the tag: [binja-final-v ## Getting Help -If you are experiencing undocumented problems with Anvill then ask for help in the `#binary-lifting` channel of the [Empire Hacking Slack](https://empireslacking.herokuapp.com/). +If you are experiencing undocumented problems with Anvill then ask for help in the `#binary-lifting` channel of the [Empire Hacking Slack](https://slack.empirehacking.nyc/). ## Supported Platforms diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index dd95ba170..6198bd846 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -368,7 +368,8 @@ message Call { optional uint64 return_address = 2; bool is_tailcall = 3; bool stop = 4; - optional uint64 target_address = 5; + bool noreturn = 5; + optional uint64 target_address = 6; } message Return { diff --git a/include/anvill/Specification.h b/include/anvill/Specification.h index a84026434..5e66f480d 100644 --- a/include/anvill/Specification.h +++ b/include/anvill/Specification.h @@ -72,6 +72,7 @@ struct Jump : ControlFlowOverrideSpec { struct Call : ControlFlowOverrideSpec { std::optional return_address; bool is_tailcall; + bool is_noreturn; std::optional target_address; }; diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index fded551f8..f0a81d36a 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -192,11 +192,19 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( builder.CreateStore(raddr, npc); builder.CreateStore(raddr, pc); } else { - call->setDoesNotReturn(); - - remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); + if (cc.is_noreturn) { + call->setDoesNotReturn(); + remill::AddTerminatingTailCall(block, intrinsics.error, intrinsics); + } else { + // a call that stops that is not noreturn should be a call + return + auto func = block->getParent(); + auto should_return = func->getArg(kShouldReturnArgNum); + builder.CreateStore(llvm::Constant::getAllOnesValue( + llvm::IntegerType::getInt1Ty(llvm_context)), + should_return); + } } - return !cc.stop; + return !cc.stop || !cc.is_noreturn; } else if (std::holds_alternative(override)) { auto func = block->getParent(); auto should_return = func->getArg(kShouldReturnArgNum); @@ -530,8 +538,12 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto stack_offsets = this->block_context->GetStackOffsetsAtEntry(); for (auto ®_off : stack_offsets.affine_equalities) { - auto new_value = LifterOptions::SymbolicStackPointerInitWithOffset( + auto *new_value = LifterOptions::SymbolicStackPointerInitWithOffset( ir, this->sp_reg, this->block_def.addr, reg_off.stack_offset); + auto *target_type = reg_off.target_value.type; + if (new_value->getType() != target_type) { + new_value = AdaptToType(ir, new_value, target_type); + } auto nmem = StoreNativeValue( new_value, reg_off.target_value, type_provider.Dictionary(), intrinsics, ir, this->state_ptr, remill::LoadMemoryPointer(ir, intrinsics)); diff --git a/lib/Lifters/CodeLifter.cpp b/lib/Lifters/CodeLifter.cpp index c4418cc2f..1a3b0309f 100644 --- a/lib/Lifters/CodeLifter.cpp +++ b/lib/Lifters/CodeLifter.cpp @@ -55,12 +55,10 @@ CodeLifter::CodeLifter(const LifterOptions &options, op_lifter(options.arch->DefaultLifter(intrinsics)), is_sparc(options.arch->IsSPARC32() || options.arch->IsSPARC64()), is_x86_or_amd64(options.arch->IsX86() || options.arch->IsAMD64()), - pc_reg(options.arch - ->RegisterByName(options.arch->ProgramCounterRegisterName()) - ->EnclosingRegister()), - sp_reg( - options.arch->RegisterByName(options.arch->StackPointerRegisterName()) - ->EnclosingRegister()), + pc_reg(options.arch->RegisterByName( + options.arch->ProgramCounterRegisterName())), + sp_reg(options.arch->RegisterByName( + options.arch->StackPointerRegisterName())), memory_provider(options.memory_provider), type_provider(options.type_provider), type_specifier(type_specifier), diff --git a/lib/Specification.cpp b/lib/Specification.cpp index 7e1d96f46..c02403261 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -213,6 +213,7 @@ SpecificationImpl::ParseSpecification( for (auto &call : spec.overrides().calls()) { Call callspec{}; callspec.stop = call.stop(); + callspec.is_noreturn = call.noreturn(); callspec.address = call.address(); if (call.has_return_address()) { callspec.return_address = call.return_address(); @@ -315,7 +316,7 @@ GetArch(llvm::LLVMContext &context, case ::specification::ARCH_AARCH32: arch_name = remill::kArchAArch32LittleEndian; break; - case ::specification::ARCH_SPARC32: arch_name = remill::kArchSparc32; break; + case ::specification::ARCH_SPARC32: arch_name = remill::kArchSparc32_SLEIGH; break; case ::specification::ARCH_SPARC64: arch_name = remill::kArchSparc64; break; case ::specification::ARCH_PPC: arch_name = remill::kArchPPC; break; } diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 898caf21d..95b0aa262 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 898caf21d5b04a4e4151cf93d2198cf05d629129 +Subproject commit 95b0aa2621908df4982a18b02ef50fcb94d1044c diff --git a/remill b/remill index 018324821..17cff6b4d 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit 01832482184da13024f5c511fdb582c728ab843c +Subproject commit 17cff6b4df900c68ff583debec5fabe76a01d9a5 diff --git a/scripts/build.sh b/scripts/build.sh index 2eeaa5a93..6acae60e0 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -247,6 +247,7 @@ function BuildRemill -DCMAKE_TOOLCHAIN_FILE="${DOWNLOAD_DIR}/${LIBRARY_VERSION}/scripts/buildsystems/vcpkg.cmake" \ -DVCPKG_TARGET_TRIPLET="${VCPKG_TARGET_TRIPLET}" \ -G Ninja \ + ${BUILD_FLAGS} \ ${SRC_DIR}/remill cmake --build . --target install From d56930566f0864dbb97820b40d931e066eb1b3fc Mon Sep 17 00:00:00 2001 From: Eric Kilmer Date: Fri, 17 Nov 2023 11:26:34 -0500 Subject: [PATCH 158/163] Fix infinite loop with self-referential global struct (#403) The issue that this change solves can be repeated with the following program ```c // gcc -O0 -g -gdwarf-4 self_refer.c -o self_refer #include struct foo { int num; struct foo *foo_next; }; static struct foo foo0 = { .num = 3, .foo_next = &foo0 }; int main() { printf("Num: %d\n", foo0.foo_next->num); } ``` --- lib/Lifters/DataLifter.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/Lifters/DataLifter.cpp b/lib/Lifters/DataLifter.cpp index 4ddc849f5..202b4b0bd 100644 --- a/lib/Lifters/DataLifter.cpp +++ b/lib/Lifters/DataLifter.cpp @@ -184,20 +184,22 @@ llvm::Constant *DataLifter::LiftData(const VariableDecl &decl, } } - if (bytes_accessable) { - value = lifter_context.value_lifter.Lift(bytes, type, lifter_context, - decl.address); - } - - auto is_constant = first_byte_perms == BytePermission::kReadable || first_byte_perms == BytePermission::kReadableExecutable; auto md = type_specifier.EncodeToMetadata(decl.spec_type); auto gvar = new llvm::GlobalVariable(*options.module, type, is_constant, llvm::GlobalValue::ExternalLinkage, - value, var_name); + nullptr, var_name); gvar->setMetadata("anvill.type", md); + lifter_context.AddEntity(gvar, decl.address); + + if (bytes_accessable) { + value = lifter_context.value_lifter.Lift(bytes, type, lifter_context, + decl.address); + } + gvar->setInitializer(value); + return gvar; } From e8ca92cca58bfdb413121a1edacd5f80ea06cb1e Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Tue, 5 Dec 2023 10:14:55 -0500 Subject: [PATCH 159/163] Bump lifting-tools-ci to use clang-14 for SPARC (#404) * allow for manual workflow trigger * bump lifting-tools-ci for sparc binaries + clang14 * bump for sparc --- .github/workflows/build.yml | 2 ++ libraries/lifting-tools-ci | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9171cf43b..57f6a23ec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,6 +24,8 @@ on: branches: - "*" + workflow_dispatch: + jobs: cleanup_stale_workflows: runs-on: ubuntu-22.04 diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 95b0aa262..8322d34e2 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 95b0aa2621908df4982a18b02ef50fcb94d1044c +Subproject commit 8322d34e2360688938f2ef9ba877aaa613101e32 From 70209a8c3311cc97875605a137da210233fe9cd6 Mon Sep 17 00:00:00 2001 From: 2over12 Date: Tue, 5 Dec 2023 14:40:47 -0500 Subject: [PATCH 160/163] Basic pointer use heuristic (#402) * basic pointer fix * add binary addrs * spurious header * add type names * named types * default for null ty * fix typedef --- data_specifications/specification.proto | 17 ++++ include/anvill/Declarations.h | 30 +++--- .../Passes/ConvertAddressesToEntityUses.h | 7 +- include/anvill/Providers.h | 10 ++ include/anvill/Type.h | 24 ++++- lib/Declarations.cpp | 3 +- lib/Lifters/EntityLifter.cpp | 10 ++ lib/Passes/ConvertAddressesToEntityUses.cpp | 18 +++- lib/Protobuf.cpp | 93 ++++++++++++++----- lib/Protobuf.h | 23 +++-- lib/Providers/TypeProvider.cpp | 20 ++++ lib/Specification.cpp | 23 +++-- lib/Specification.h | 3 + lib/Type.cpp | 30 +++++- 14 files changed, 257 insertions(+), 54 deletions(-) diff --git a/data_specifications/specification.proto b/data_specifications/specification.proto index 6198bd846..f468bea87 100644 --- a/data_specifications/specification.proto +++ b/data_specifications/specification.proto @@ -313,6 +313,18 @@ message TypeHint { Variable target_var = 2; } +message RelativeAddress { + uint64 entry_vaddr = 1; + int64 displacement = 2; +} + +message ProgramAddress { + oneof inner { + uint64 internal_address = 1; + RelativeAddress ext_address = 2; + } +} + message Function { uint64 entry_address = 1; uint64 entry_uid = 12; @@ -334,11 +346,15 @@ message Function { // to have this type after this instruction, these will be translated into // a low lifting of that location with spec type metadata repeated TypeHint type_hints = 11; + ProgramAddress binary_addr = 13; } + + message GlobalVariable { TypeSpec type = 1; uint64 address = 2; + ProgramAddress binary_address = 3; } message Symbol { @@ -404,4 +420,5 @@ message Specification { string image_name = 10; uint64 image_base = 11; repeated string required_globals = 12; + map type_names = 13; } diff --git a/include/anvill/Declarations.h b/include/anvill/Declarations.h index d4eb6332e..a95bc328e 100644 --- a/include/anvill/Declarations.h +++ b/include/anvill/Declarations.h @@ -51,7 +51,7 @@ struct Uid { bool operator==(const Uid &) const = default; }; -} +} // namespace anvill template <> struct std::hash { @@ -88,6 +88,14 @@ struct LowLoc { bool operator==(const LowLoc &loc) const = default; }; + +struct RelAddr { + uint64_t vaddr; + std::int64_t disp; +}; + +using MachineAddr = std::variant; + // A value, such as a parameter or a return value. Values are resident // in one of two locations: either in a register, represented by a non- // nullptr `reg` value, or in memory, at `[mem_reg + mem_offset]`. @@ -140,6 +148,8 @@ struct VariableDecl { // Address of this global variable. std::uint64_t address{0}; + MachineAddr binary_addr{}; + // Declare this global variable in an LLVM module. llvm::GlobalVariable *DeclareInModule(const std::string &name, llvm::Module &) const; @@ -427,17 +437,13 @@ struct FunctionDecl : public CallableDecl { std::unordered_map stack_offsets_at_exit; - std::unordered_map> - live_regs_at_entry; + std::unordered_map> live_regs_at_entry; - std::unordered_map> - live_regs_at_exit; + std::unordered_map> live_regs_at_exit; - std::unordered_map> - constant_values_at_entry; + std::unordered_map> constant_values_at_entry; - std::unordered_map> - constant_values_at_exit; + std::unordered_map> constant_values_at_exit; // sorted vector of hints std::vector type_hints; @@ -452,6 +458,9 @@ struct FunctionDecl : public CallableDecl { std::size_t parameter_size{0}; + MachineAddr binary_addr{}; + + std::vector in_scope_variables; // Declare this function in an LLVM module. @@ -472,8 +481,7 @@ struct FunctionDecl : public CallableDecl { SpecBlockContext GetBlockContext(Uid uid) const; - void - AddBBContexts(std::unordered_map &contexts) const; + void AddBBContexts(std::unordered_map &contexts) const; }; // A call site decl, as represented at a "near ABI" level. This is like a diff --git a/include/anvill/Passes/ConvertAddressesToEntityUses.h b/include/anvill/Passes/ConvertAddressesToEntityUses.h index 885ab8776..6e794c3b6 100644 --- a/include/anvill/Passes/ConvertAddressesToEntityUses.h +++ b/include/anvill/Passes/ConvertAddressesToEntityUses.h @@ -10,6 +10,7 @@ #include #include + #include #include @@ -44,7 +45,6 @@ using EntityUsages = std::vector; class ConvertAddressesToEntityUses final : public llvm::PassInfoMixin { private: - // Resolve addresses to entities and vice versa. const CrossReferenceResolver &xref_resolver; @@ -52,7 +52,6 @@ class ConvertAddressesToEntityUses final const std::optional pc_metadata_id; public: - // Function pass entry point llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &fam); @@ -60,13 +59,15 @@ class ConvertAddressesToEntityUses final // Returns the pass name static llvm::StringRef name(void); + bool IsPointerLike(llvm::Use &use); + // Enumerates some of the possible entity usages that are isolated to // specific instruction operand uses. EntityUsages EnumeratePossibleEntityUsages(llvm::Function &function); ConvertAddressesToEntityUses( const CrossReferenceResolver &xref_resolver_, - std::optional pc_metadata_id_=std::nullopt); + std::optional pc_metadata_id_ = std::nullopt); }; } // namespace anvill diff --git a/include/anvill/Providers.h b/include/anvill/Providers.h index c14373f62..ceffc0b96 100644 --- a/include/anvill/Providers.h +++ b/include/anvill/Providers.h @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include @@ -71,6 +72,8 @@ class TypeProvider { virtual const ::anvill::TypeDictionary &Dictionary(void) const = 0; + virtual std::vector NamedTypes(void) const = 0; + virtual ~TypeProvider() = default; }; @@ -117,6 +120,9 @@ class NullTypeProvider : public BaseTypeProvider { std::optional TryGetVariableType(uint64_t, llvm::Type *hinted_value_type = nullptr) const override; + std::vector NamedTypes(void) const override { + return {}; + } }; // Delegates to an underlying tye provider to provide the data. Derived from @@ -149,6 +155,8 @@ class ProxyTypeProvider : public TypeProvider { std::optional)> typed_reg_cb) const override; + std::vector NamedTypes(void) const override; + const ::anvill::TypeDictionary &Dictionary(void) const override; }; @@ -199,6 +207,8 @@ class SpecificationTypeProvider : public BaseTypeProvider { TryGetVariableType(uint64_t address, llvm::Type *hinted_value_type = nullptr) const override; + std::vector NamedTypes(void) const override; + private: SpecificationTypeProvider(void) = delete; }; diff --git a/include/anvill/Type.h b/include/anvill/Type.h index f0ac573c8..ac00c9968 100644 --- a/include/anvill/Type.h +++ b/include/anvill/Type.h @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -41,6 +42,10 @@ class Arch; } // namespace remill namespace anvill { +llvm::StructType *getOrCreateNamedStruct(llvm::LLVMContext &context, + llvm::StringRef Name); + + struct TypeSpecificationError final { enum class ErrorCode { InvalidSpecFormat, @@ -104,11 +109,21 @@ struct UnknownType { bool operator==(const UnknownType &) const = default; }; + +class TypeName { + public: + std::string name; + + bool operator==(const TypeName &) const = default; + + explicit TypeName(std::string name) : name(name) {} +}; + using TypeSpec = std::variant, std::shared_ptr, std::shared_ptr, std::shared_ptr, std::shared_ptr, - UnknownType>; + UnknownType, TypeName>; bool operator==(std::shared_ptr, std::shared_ptr); bool operator==(std::shared_ptr, std::shared_ptr); @@ -285,6 +300,13 @@ class TypeTranslator { namespace std { +template <> +struct hash { + size_t operator()(const anvill::TypeName &unk) const { + return std::hash()(unk.name); + } +}; + template <> struct hash { size_t operator()(const anvill::UnknownType &unk) const { diff --git a/lib/Declarations.cpp b/lib/Declarations.cpp index 5d642baf6..4dba43764 100644 --- a/lib/Declarations.cpp +++ b/lib/Declarations.cpp @@ -411,7 +411,8 @@ CallableDecl::DecodeFromPB(const remill::Arch *arch, const std::string &pb) { const TypeDictionary type_dictionary(*(arch->context)); const TypeTranslator type_translator(type_dictionary, arch); std::unordered_map type_map; - ProtobufTranslator translator(type_translator, arch, type_map); + std::unordered_map type_names; + ProtobufTranslator translator(type_translator, arch, type_map, type_names); auto default_callable_decl_res = translator.DecodeDefaultCallableDecl(function); diff --git a/lib/Lifters/EntityLifter.cpp b/lib/Lifters/EntityLifter.cpp index 29f8d700d..900b0ef82 100644 --- a/lib/Lifters/EntityLifter.cpp +++ b/lib/Lifters/EntityLifter.cpp @@ -12,8 +12,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -35,6 +37,14 @@ EntityLifterImpl::EntityLifterImpl(const LifterOptions &options_) data_lifter(options) { CHECK_EQ(options.arch->context, &(options.module->getContext())); options.arch->PrepareModule(options.module); + + // Lift named types + for (auto sty : this->type_provider->NamedTypes()) { + auto gv = new llvm::GlobalVariable(*options.module, sty, false, + llvm::GlobalValue::ExternalLinkage, + nullptr, sty->getName() + "_var_repr"); + llvm::appendToUsed(*options.module, gv); + } } // Tells the entity lifter that `entity` is the lifted function/data at diff --git a/lib/Passes/ConvertAddressesToEntityUses.cpp b/lib/Passes/ConvertAddressesToEntityUses.cpp index ea04c8fee..b9c3d00ee 100644 --- a/lib/Passes/ConvertAddressesToEntityUses.cpp +++ b/lib/Passes/ConvertAddressesToEntityUses.cpp @@ -14,6 +14,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -37,6 +41,16 @@ static llvm::MDNode *GetPCAnnotation(llvm::Module *module, uint64_t pc) { } // namespace + +bool ConvertAddressesToEntityUses::IsPointerLike(llvm::Use &use) { + if (auto cst = llvm::dyn_cast(use.get())) { + return llvm::Instruction::IntToPtr == cst->getOpcode(); + } + // TODO(Ian): Add use of type annotations here + + return false; +} + llvm::PreservedAnalyses ConvertAddressesToEntityUses::run(llvm::Function &function, llvm::FunctionAnalysisManager &fam) { @@ -150,9 +164,11 @@ EntityUsages ConvertAddressesToEntityUses::EnumeratePossibleEntityUsages( ra.is_valid && !ra.references_return_address && !ra.references_stack_pointer) { + if (ra.references_entity || // Related to an existing lifted entity. ra.references_global_value || // Related to a global var/func. - ra.references_program_counter) { // Related to `__anvill_pc`. + ra.references_program_counter || + IsPointerLike(use)) { // Related to `__anvill_pc`. output.emplace_back(&use, ra); } } diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 9723d131c..9e267f867 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -261,14 +263,16 @@ Result ProtobufTranslator::ParseIntoCallableDecl( ProtobufTranslator::ProtobufTranslator( const anvill::TypeTranslator &type_translator_, const remill::Arch *arch_, - std::unordered_map &type_map) + std::unordered_map &type_map, + std::unordered_map &type_names) : arch(arch_), type_translator(type_translator_), context(*(arch->context)), void_type(llvm::Type::getVoidTy(context)), dict_void_type(remill::RecontextualizeType( type_translator.Dictionary().u.named.void_, context)), - type_map(type_map) {} + type_map(type_map), + type_names(type_names) {} anvill::Result @@ -462,8 +466,12 @@ ProtobufTranslator::DecodeType(const ::specification::TypeSpec &obj) const { } } if (obj.has_alias()) { - if (type_map.count(obj.alias())) { - return type_map.at(obj.alias()); + if (this->type_names.count(obj.alias())) { + TypeSpec res = TypeName(type_names.at(obj.alias())); + return res; + } else if (this->type_map.count(obj.alias())) { + TypeSpec tspec = this->type_map.at(obj.alias()); + return tspec; } else { LOG(ERROR) << "Unknown alias id " << obj.alias(); return {BaseType::Void}; @@ -518,6 +526,14 @@ Result ProtobufTranslator::DecodeFunction( decl.address = function.entry_address(); decl.entry_uid = Uid{function.entry_uid()}; + + if (function.binary_addr().has_ext_address()) { + auto ext = function.binary_addr().ext_address(); + decl.binary_addr = RelAddr{ext.entry_vaddr(), ext.displacement()}; + } else { + decl.binary_addr = function.binary_addr().internal_address(); + } + if (!function.has_callable()) { return std::string("all functions should have a callable"); } @@ -532,7 +548,7 @@ Result ProtobufTranslator::DecodeFunction( if (!function.has_frame()) { return std::string("All functions should have a frame"); } - const auto& frame = function.frame(); + const auto &frame = function.frame(); decl.stack_depth = frame.frame_size(); decl.ret_ptr_offset = frame.return_address_offset(); @@ -612,8 +628,7 @@ Result ProtobufTranslator::DecodeFunction( } void ProtobufTranslator::AddLiveValuesToBB( - std::unordered_map> &map, - Uid bb_uid, + std::unordered_map> &map, Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const { auto &v = map.insert({bb_uid, std::vector()}).first->second; @@ -630,7 +645,7 @@ void ProtobufTranslator::AddLiveValuesToBB( void ProtobufTranslator::ParseCFGIntoFunction( const ::specification::Function &obj, FunctionDecl &decl) const { - for (const auto& blk : obj.blocks()) { + for (const auto &blk : obj.blocks()) { std::unordered_set tmp; for (auto o : blk.second.outgoing_blocks()) { tmp.insert({o}); @@ -641,7 +656,7 @@ void ProtobufTranslator::ParseCFGIntoFunction( tmp, {blk.second.context_assignments().begin(), blk.second.context_assignments().end()}, - {blk.first}, + {blk.first}, }; decl.cfg.emplace(Uid{blk.first}, std::move(nblk)); } @@ -781,14 +796,30 @@ Result ProtobufTranslator::DecodeGlobalVar( } decl.type = type; + if (obj.binary_address().has_ext_address()) { + decl.binary_addr = + RelAddr{obj.binary_address().ext_address().entry_vaddr(), + obj.binary_address().ext_address().displacement()}; + } else { + decl.binary_addr = obj.binary_address().internal_address(); + } + + return decl; } anvill::Result ProtobufTranslator::DecodeType( const ::specification::TypeSpec &obj, - const std::unordered_map &map) { + const std::unordered_map &map, + const std::unordered_map &named_types) { if (obj.has_alias()) { auto alias = obj.alias(); + + if (named_types.contains(alias)) { + TypeSpec tname = TypeName(named_types.at(alias)); + return tname; + } + if (type_map.count(alias)) { return type_map[alias]; } @@ -800,7 +831,7 @@ anvill::Result ProtobufTranslator::DecodeType( return {BaseType::Void}; } - auto res = DecodeType(map.at(alias), map); + auto res = DecodeType(map.at(alias), map, named_types); if (!res.Succeeded()) { return res.TakeError(); } @@ -811,7 +842,7 @@ anvill::Result ProtobufTranslator::DecodeType( auto pointer = obj.pointer(); TypeSpec pointee = BaseType::Void; if (pointer.has_pointee()) { - auto maybe_pointee = DecodeType(pointer.pointee(), map); + auto maybe_pointee = DecodeType(pointer.pointee(), map, named_types); if (!maybe_pointee.Succeeded()) { return maybe_pointee.Error(); } @@ -824,7 +855,7 @@ anvill::Result ProtobufTranslator::DecodeType( if (!vector.has_base()) { return {"Vector type without base type"}; } - auto maybe_base = DecodeType(vector.base(), map); + auto maybe_base = DecodeType(vector.base(), map, named_types); if (!maybe_base.Succeeded()) { return maybe_base.Error(); } @@ -835,7 +866,7 @@ anvill::Result ProtobufTranslator::DecodeType( if (!array.has_base()) { return {"Array type without base type"}; } - auto maybe_base = DecodeType(array.base(), map); + auto maybe_base = DecodeType(array.base(), map, named_types); if (!maybe_base.Succeeded()) { return maybe_base.Error(); } @@ -844,7 +875,7 @@ anvill::Result ProtobufTranslator::DecodeType( if (obj.has_struct_()) { auto res = std::make_shared(); for (auto elem : obj.struct_().members()) { - auto maybe_type = DecodeType(elem, map); + auto maybe_type = DecodeType(elem, map, named_types); if (!maybe_type.Succeeded()) { return maybe_type.Error(); } @@ -858,14 +889,14 @@ anvill::Result ProtobufTranslator::DecodeType( return {"Function without return type"}; } auto res = std::make_shared(); - auto maybe_ret = DecodeType(func.return_type(), map); + auto maybe_ret = DecodeType(func.return_type(), map, named_types); if (!maybe_ret.Succeeded()) { return maybe_ret.Error(); } res->return_type = std::move(maybe_ret.Value()); res->is_variadic = func.is_variadic(); for (auto arg : func.arguments()) { - auto maybe_argtype = DecodeType(arg, map); + auto maybe_argtype = DecodeType(arg, map, named_types); if (!maybe_argtype.Succeeded()) { return maybe_argtype.Error(); } @@ -877,17 +908,37 @@ anvill::Result ProtobufTranslator::DecodeType( } Result ProtobufTranslator::DecodeTypeMap( - const ::google::protobuf::Map - &map) { + const ::google::protobuf::Map &map, + const ::google::protobuf::Map &names) { for (auto &[k, v] : map) { if (type_map.count(k)) { continue; } - auto res = DecodeType(v, {map.begin(), map.end()}); + auto res = + DecodeType(v, {map.begin(), map.end()}, {names.begin(), names.end()}); + if (!res.Succeeded()) { return res.Error(); } - type_map[k] = res.Value(); + + + if (names.contains(k)) { + auto ty = this->type_translator.DecodeFromSpec(res.Value()); + if (!ty.Succeeded()) { + return ty.Error().message; + } + + if (auto *sty = llvm::dyn_cast(ty.Value())) { + + + std::string name = names.at(k); + auto res = getOrCreateNamedStruct(this->context, name); + res->setBody(sty->elements()); + } + type_names[k] = names.at(k); + } else { + type_map[k] = res.Value(); + } } return std::monostate{}; } diff --git a/lib/Protobuf.h b/lib/Protobuf.h index 327152b65..ad2d9fc48 100644 --- a/lib/Protobuf.h +++ b/lib/Protobuf.h @@ -55,13 +55,15 @@ class ProtobufTranslator { llvm::Type *const dict_void_type; std::unordered_map &type_map; + std::unordered_map &type_names; anvill::Result DecodeType(const ::specification::TypeSpec &obj) const; anvill::Result DecodeType( const ::specification::TypeSpec &obj, - const std::unordered_map &map); + const std::unordered_map &map, + const std::unordered_map &named_types); // Parse the location of a value. This applies to both parameters and @@ -90,8 +92,7 @@ class ProtobufTranslator { FunctionDecl &decl) const; void AddLiveValuesToBB( - std::unordered_map> &map, - Uid bb_uid, + std::unordered_map> &map, Uid bb_uid, const ::google::protobuf::RepeatedPtrField<::specification::Parameter> &values) const; @@ -99,13 +100,16 @@ class ProtobufTranslator { public: explicit ProtobufTranslator( const anvill::TypeTranslator &type_translator_, const remill::Arch *arch_, - std::unordered_map &type_map); + std::unordered_map &type_map, + std::unordered_map &type_names); inline explicit ProtobufTranslator( const anvill::TypeTranslator &type_translator_, const std::unique_ptr &arch_, - std::unordered_map &type_map) - : ProtobufTranslator(type_translator_, arch_.get(), type_map) {} + std::unordered_map &type_map, + std::unordered_map &type_names) + : ProtobufTranslator(type_translator_, arch_.get(), type_map, + type_names) {} // Parse a parameter from the Protobuf spec. Parameters should have names, // as that makes the bitcode slightly easier to read, but names are @@ -133,9 +137,10 @@ class ProtobufTranslator { Result DecodeDefaultCallableDecl(const ::specification::Function &obj) const; - Result - DecodeTypeMap(const ::google::protobuf::Map &map); + Result DecodeTypeMap( + const ::google::protobuf::Map + &map, + const ::google::protobuf::Map &names); }; } // namespace anvill diff --git a/lib/Providers/TypeProvider.cpp b/lib/Providers/TypeProvider.cpp index 661f4c033..dd6516826 100644 --- a/lib/Providers/TypeProvider.cpp +++ b/lib/Providers/TypeProvider.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include "Specification.h" @@ -97,6 +98,20 @@ SpecificationTypeProvider::TryGetFunctionType(uint64_t address) const { } } +std::vector +SpecificationTypeProvider::NamedTypes(void) const { + std::vector stys; + + for (auto nms : this->impl->named_types) { + auto sty = llvm::StructType::getTypeByName(this->context, nms); + if (sty) { + stys.push_back(sty); + } + } + + return stys; +} + std::optional SpecificationTypeProvider::TryGetVariableType(uint64_t address, llvm::Type *) const { @@ -179,6 +194,11 @@ void ProxyTypeProvider::QueryRegisterStateAtInstruction( typed_reg_cb); } +std::vector ProxyTypeProvider::NamedTypes(void) const { + return this->deleg.NamedTypes(); +} + + const ::anvill::TypeDictionary &ProxyTypeProvider::Dictionary(void) const { return this->deleg.Dictionary(); } diff --git a/lib/Specification.cpp b/lib/Specification.cpp index c02403261..51ee5e54f 100644 --- a/lib/Specification.cpp +++ b/lib/Specification.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -47,8 +49,11 @@ SpecificationImpl::ParseSpecification( const ::specification::Specification &spec) { std::vector dec_err; std::unordered_map type_map; - ProtobufTranslator translator(type_translator, arch.get(), type_map); - auto map_res = translator.DecodeTypeMap(spec.type_aliases()); + std::unordered_map type_names; + ProtobufTranslator translator(type_translator, arch.get(), type_map, + type_names); + auto map_res = + translator.DecodeTypeMap(spec.type_aliases(), spec.type_names()); if (!map_res.Succeeded()) { dec_err.push_back(map_res.Error()); } @@ -69,7 +74,7 @@ SpecificationImpl::ParseSpecification( auto func_ptr = new FunctionDecl(std::move(func_obj)); - for (const auto& [uid, bb]: func_ptr->cfg) { + for (const auto &[uid, bb] : func_ptr->cfg) { if (uid_to_block.count(uid)) { std::stringstream ss; ss << "Duplicate block Uid: " << uid.value; @@ -254,6 +259,10 @@ SpecificationImpl::ParseSpecification( required_globals = {spec.required_globals().begin(), spec.required_globals().end()}; + for (const auto &[_k, v] : spec.type_names()) { + this->named_types.push_back(v); + } + return dec_err; } @@ -316,7 +325,9 @@ GetArch(llvm::LLVMContext &context, case ::specification::ARCH_AARCH32: arch_name = remill::kArchAArch32LittleEndian; break; - case ::specification::ARCH_SPARC32: arch_name = remill::kArchSparc32_SLEIGH; break; + case ::specification::ARCH_SPARC32: + arch_name = remill::kArchSparc32_SLEIGH; + break; case ::specification::ARCH_SPARC64: arch_name = remill::kArchSparc64; break; case ::specification::ARCH_PPC: arch_name = remill::kArchPPC; break; } @@ -380,6 +391,7 @@ Specification::DecodeFromPB(llvm::LLVMContext &context, const std::string &pb) { anvill::Result Specification::DecodeFromPB(llvm::LLVMContext &context, std::istream &pb) { ::specification::Specification spec; + if (!spec.ParseFromIstream(&pb)) { return {"Failed to parse specification"}; } @@ -432,8 +444,7 @@ Specification::FunctionAt(std::uint64_t address) const { } // Return the block with `uid`, or an empty `shared_ptr`. -std::shared_ptr -Specification::BlockAt(Uid uid) const { +std::shared_ptr Specification::BlockAt(Uid uid) const { auto it = impl->uid_to_block.find(uid); if (it != impl->uid_to_block.end()) { return std::shared_ptr(impl, it->second); diff --git a/lib/Specification.h b/lib/Specification.h index 59f20dc64..8034acebe 100644 --- a/lib/Specification.h +++ b/lib/Specification.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "anvill/Passes/BasicBlockPass.h" @@ -98,6 +99,8 @@ class SpecificationImpl std::unordered_map control_flow_overrides; std::unordered_set required_globals; + + std::vector named_types; }; } // namespace anvill diff --git a/lib/Type.cpp b/lib/Type.cpp index e197d59f8..3b79e8b92 100644 --- a/lib/Type.cpp +++ b/lib/Type.cpp @@ -10,6 +10,8 @@ #include #include +#include + #define ANVILL_USE_WRAPPED_TYPES 0 // clang-format off @@ -84,6 +86,7 @@ class TypeSpecifierImpl { llvm::MDNode *TypeToMetadata(std::shared_ptr type); llvm::MDNode *TypeToMetadata(std::shared_ptr type); llvm::MDNode *TypeToMetadata(UnknownType type); + llvm::MDNode *TypeToMetadata(TypeName type); }; // Translates an llvm::Type to a type that conforms to the spec in @@ -425,6 +428,13 @@ llvm::MDNode *TypeSpecifierImpl::TypeToMetadata(UnknownType type) { return llvm::MDNode::get(context, {str, llvm::ConstantAsMetadata::get(size)}); } +llvm::MDNode *TypeSpecifierImpl::TypeToMetadata(TypeName type) { + auto str = llvm::MDString::get(context, "Typename"); + auto nm = llvm::MDString::get(context, type.name); + + return llvm::MDNode::get(context, {str, nm}); +} + namespace { #if ANVILL_USE_WRAPPED_TYPES @@ -753,8 +763,26 @@ TypeTranslator::DecodeFromSpec(TypeSpec spec) const { unk.size == UINT32_MAX ? 32 : unk.size * 8); } + + if (std::holds_alternative(spec)) { + auto nm = std::get(spec); + auto sty = getOrCreateNamedStruct(this->impl->context, nm.name); + CHECK(sty); + return sty; + } + return TypeSpecificationError{TypeSpecificationError::ErrorCode::InvalidState, - "Function fell out of bounds"}; + "Unhandled type specification variant"}; +} + +llvm::StructType *getOrCreateNamedStruct(llvm::LLVMContext &context, + llvm::StringRef Name) { + auto res = llvm::StructType::getTypeByName(context, Name); + if (res) { + return res; + } + + return llvm::StructType::create(context, Name); } namespace { From 30969fbd50bda72ddd73e226d750515cdc8b677a Mon Sep 17 00:00:00 2001 From: 2over12 Date: Wed, 28 Feb 2024 14:47:09 -0500 Subject: [PATCH 161/163] bump remill and fix initial state pc (#407) * bump remill and fix initial state pc * bump remill * don't install python3 bindings in CI * dont set body twice if struct named twice * completely remove python * try to be robust to overlapping vars --------- Co-authored-by: William Tan <1284324+Ninja3047@users.noreply.github.com> --- .github/workflows/build.yml | 1 + lib/CMakeLists.txt | 4 ---- lib/Lifters/BasicBlockLifter.cpp | 14 +++++++++----- lib/Passes/ReplaceStackReferences.cpp | 3 ++- lib/Protobuf.cpp | 4 +++- libraries/lifting-tools-ci | 2 +- remill | 2 +- 7 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 57f6a23ec..b11855114 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -286,6 +286,7 @@ jobs: -Dsleigh_DIR:PATH=${{ steps.remill_installer.outputs.PATH }}/usr/local/lib/cmake/sleigh \ -DANVILL_ENABLE_TESTS=true \ -DANVILL_ENABLE_INSTALL=true \ + -DANVILL_INSTALL_PYTHON3_LIBS=false \ -G Ninja \ ${{ steps.build_paths.outputs.SOURCE }}/anvill diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 16d3e66b1..882775249 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -198,10 +198,6 @@ target_link_libraries(anvill add_dependencies(anvill check_git_anvill) -if(ANVILL_ENABLE_PYTHON3_LIBS) - add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../python" python) -endif() - # if(ANVILL_ENABLE_TESTS) # add_subdirectory("tests") # endif() diff --git a/lib/Lifters/BasicBlockLifter.cpp b/lib/Lifters/BasicBlockLifter.cpp index f0a81d36a..ec402cfbc 100644 --- a/lib/Lifters/BasicBlockLifter.cpp +++ b/lib/Lifters/BasicBlockLifter.cpp @@ -200,7 +200,7 @@ bool BasicBlockLifter::DoInterProceduralControlFlow( auto func = block->getParent(); auto should_return = func->getArg(kShouldReturnArgNum); builder.CreateStore(llvm::Constant::getAllOnesValue( - llvm::IntegerType::getInt1Ty(llvm_context)), + llvm::IntegerType::getInt1Ty(llvm_context)), should_return); } } @@ -417,7 +417,8 @@ llvm::MDNode *BasicBlockLifter::GetBasicBlockUidAnnotation(Uid uid) const { llvm::Function *BasicBlockLifter::DeclareBasicBlockFunction() { std::string name_ = "func" + std::to_string(decl.address) + "basic_block" + - std::to_string(this->block_def.addr) + "_" + std::to_string(this->block_def.uid.value); + std::to_string(this->block_def.addr) + "_" + + std::to_string(this->block_def.uid.value); auto &context = this->semantics_module->getContext(); llvm::FunctionType *lifted_func_type = llvm::dyn_cast(remill::RecontextualizeType( @@ -597,7 +598,8 @@ BasicBlockFunction BasicBlockLifter::CreateBasicBlockFunction() { auto pc_ptr = pc_reg->AddressOf(this->state_ptr, ir); auto pc_val = this->options.program_counter_init_procedure( ir, this->address_type, this->block_def.addr); - ir.CreateStore(pc_val, pc_ptr); + + ir.CreateStore(ir.CreateZExtOrTrunc(pc_val, pc_reg_type), pc_ptr); std::array args = { this->state_ptr, pc_val, mem_res, next_pc, should_return}; @@ -648,7 +650,8 @@ void BasicBlockLifter::TerminateBasicBlockFunction( llvm::IRBuilder<> calling_bb_builder(calling_bb); auto edge_bb = this->decl.cfg.find(edge_uid); CHECK(edge_bb != this->decl.cfg.end()); - auto &child_lifter = this->flifter.GetOrCreateBasicBlockLifter(edge_bb->second.uid); + auto &child_lifter = + this->flifter.GetOrCreateBasicBlockLifter(edge_bb->second.uid); auto retval = child_lifter.ControlFlowCallBasicBlockFunction( caller, calling_bb_builder, this->state_ptr, bbfunc.stack, next_mem); if (this->flifter.curr_decl->type->getReturnType()->isVoidTy()) { @@ -658,7 +661,8 @@ void BasicBlockLifter::TerminateBasicBlockFunction( } auto succ_const = llvm::ConstantInt::get( - llvm::cast(this->address_type), edge_bb->second.addr); + llvm::cast(this->address_type), + edge_bb->second.addr); sw->addCase(succ_const, calling_bb); } diff --git a/lib/Passes/ReplaceStackReferences.cpp b/lib/Passes/ReplaceStackReferences.cpp index 60c94a38d..958cd5021 100644 --- a/lib/Passes/ReplaceStackReferences.cpp +++ b/lib/Passes/ReplaceStackReferences.cpp @@ -235,11 +235,12 @@ class StackModel { GetParamDeclSize(var) - 1); } - LOG(FATAL) << "Inserting variable that overlaps with current frame " + LOG(ERROR) << "Inserting variable that overlaps with current frame " << var.ordered_locs[0].mem_offset << " with size: " << GetParamDeclSize(var) << " Overlaps with " << oparam->decl.decl.ordered_locs[0].mem_offset << " with size " << GetParamDeclSize(oparam->decl.decl); + return; } this->frame.insert({var.ordered_locs[0].mem_offset, {index, var}}); diff --git a/lib/Protobuf.cpp b/lib/Protobuf.cpp index 9e267f867..1064876c6 100644 --- a/lib/Protobuf.cpp +++ b/lib/Protobuf.cpp @@ -933,7 +933,9 @@ Result ProtobufTranslator::DecodeTypeMap( std::string name = names.at(k); auto res = getOrCreateNamedStruct(this->context, name); - res->setBody(sty->elements()); + if (res->isOpaque()) { + res->setBody(sty->elements()); + } } type_names[k] = names.at(k); } else { diff --git a/libraries/lifting-tools-ci b/libraries/lifting-tools-ci index 8322d34e2..95b0aa262 160000 --- a/libraries/lifting-tools-ci +++ b/libraries/lifting-tools-ci @@ -1 +1 @@ -Subproject commit 8322d34e2360688938f2ef9ba877aaa613101e32 +Subproject commit 95b0aa2621908df4982a18b02ef50fcb94d1044c diff --git a/remill b/remill index 17cff6b4d..874490a89 160000 --- a/remill +++ b/remill @@ -1 +1 @@ -Subproject commit 17cff6b4df900c68ff583debec5fabe76a01d9a5 +Subproject commit 874490a894c5c8f0920af0fb583ca500abc5d65d From b47f27515a6486b6e043afe3288b65617d6fd8d4 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:37:50 -0500 Subject: [PATCH 162/163] return early if we cannot find the entry block in the cfg (#408) --- lib/Lifters/FunctionLifter.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index f6ffd2d1e..216bd1def 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -409,6 +409,13 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { return nullptr; } + // Function has no valid instructions. + auto &cfg = decl.cfg; + if (cfg.find(decl.entry_uid) == cfg.end()) { + LOG(ERROR) << "Function missing entry block " << std::hex << decl.address; + return nullptr; + } + // This is our higher-level function, i.e. it presents itself more like // a function compiled from C/C++, rather than being a three-argument Remill // function. In this function, we will stack-allocate a `State` structure, From 52f9638b023417c9bdbbb1791867cacc38c68888 Mon Sep 17 00:00:00 2001 From: William Tan <1284324+Ninja3047@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:46:05 -0500 Subject: [PATCH 163/163] Invalid func fix pt2 (#409) * return decl instead of nullptr * return error/null instead of decl --- lib/Lifters/FunctionLifter.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/Lifters/FunctionLifter.cpp b/lib/Lifters/FunctionLifter.cpp index 216bd1def..d601aebf1 100644 --- a/lib/Lifters/FunctionLifter.cpp +++ b/lib/Lifters/FunctionLifter.cpp @@ -409,13 +409,6 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { return nullptr; } - // Function has no valid instructions. - auto &cfg = decl.cfg; - if (cfg.find(decl.entry_uid) == cfg.end()) { - LOG(ERROR) << "Function missing entry block " << std::hex << decl.address; - return nullptr; - } - // This is our higher-level function, i.e. it presents itself more like // a function compiled from C/C++, rather than being a three-argument Remill // function. In this function, we will stack-allocate a `State` structure, @@ -440,6 +433,14 @@ llvm::Function *FunctionLifter::LiftFunction(const FunctionDecl &decl) { return native_func; } + // Function has no valid instructions. + auto &cfg = decl.cfg; + if (cfg.find(decl.entry_uid) == cfg.end()) { + LOG(ERROR) << "Function missing entry block " << std::hex << decl.address; + return nullptr; + } + + // Every lifted function starts as a clone of __remill_basic_block. That // prototype has multiple arguments (memory pointer, state pointer, program // counter). This extracts the state pointer.