Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
root = true

[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true

[*.py]
indent_style = space
indent_size = 4

[*.{yml,yaml,toml}]
indent_style = space
indent_size = 2

[*.{cc,h}]
indent_style = space
indent_size = 4

[Makefile]
indent_style = tab
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: CI

on:
push:
branches: [main]
pull_request:

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install ruff
run: pip install "ruff==0.15.17"
- name: Lint
run: ruff check src/
- name: Check formatting
run: ruff format --check src/
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Install hooks with: pip install pre-commit && pre-commit install
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.17
hooks:
- id: ruff-check
args: [--fix]
- id: ruff-format
20 changes: 20 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ heavy = [
"pycparser==2.22",
"capstone==5.0.0.post1",
]
# Development tools (lint, format, tests).
dev = [
"ruff==0.15.17",
"pytest",
]

[project.scripts]
rtrace = "rtrace.main:main"
Expand All @@ -41,3 +46,18 @@ package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
include = ["rtrace*"]

[tool.ruff]
target-version = "py39"
line-length = 100
# Only the Python package and tests; native code and submodules are out of scope.
include = ["src/rtrace/**/*.py", "tests/**/*.py", "pyproject.toml"]

[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort (import sorting)
"B", # flake8-bugbear
]
1 change: 1 addition & 0 deletions src/rtrace/boundary_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def boundary_detection_nucleus(so_path):
# nucleus is a native module; import lazily so callers that never reach the
# nucleus path do not require it at import time.
import nucleus

context = nucleus.load(so_path, binary_base=0x0)
entry_addrs = []
for function in context.cfg.functions:
Expand Down
3 changes: 2 additions & 1 deletion src/rtrace/disassembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
def _get_disassembler():
global _DISASSEMBLER
if _DISASSEMBLER is None:
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from capstone import CS_ARCH_X86, CS_MODE_64, Cs

disassembler = Cs(CS_ARCH_X86, CS_MODE_64)
disassembler.detail = True
disassembler.skipdata = True
Expand Down
1 change: 1 addition & 0 deletions src/rtrace/edition.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
which is installed only in the heavy edition. ``find_spec`` checks availability
without paying the cost of importing ``angr``.
"""

import importlib.util
import sys

Expand Down
86 changes: 58 additions & 28 deletions src/rtrace/function_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,17 @@ def __init__(self, pid, tids, log_dir):
self.log_dir = log_dir
self.block_info = {}
for tid in tids:
with open(f'{log_dir}/rtrace-intermediate-{pid}-{tid}-block_info.log', 'r') as f:
with open(f"{log_dir}/rtrace-intermediate-{pid}-{tid}-block_info.log", "r") as f:
for line in f:
parts = line.split(":")
assert len(parts) == 2, f"Invalid block info line: {line}"
addr = int(parts[0].strip())
num_insts = int(parts[1].strip())
if addr in self.block_info:
assert self.block_info[
addr] == num_insts, f"Duplicate block address {addr} with different instruction counts: {self.block_info[addr]} vs {num_insts}"
assert self.block_info[addr] == num_insts, (
f"Duplicate block address {addr} with different instruction "
f"counts: {self.block_info[addr]} vs {num_insts}"
)
else:
self.block_info[addr] = num_insts

Expand All @@ -56,30 +58,47 @@ def get_block_size(self, abs_addr):


class CallLogProcessor(object):
def __init__(self, process_memory: ProcessMemory, block_info: BlockInfo, pid, tid, log_dir, so_names=None):
def __init__(
self, process_memory: ProcessMemory, block_info: BlockInfo, pid, tid, log_dir, so_names=None
):
self.process_memory = process_memory
self.log_path = log_dir
self.raw_logs = []
self.abs_addr_to_func = {}
self.root_call = Call("root", 0, 0, "root", 0,)
self.root_call = Call(
"root",
0,
0,
"root",
0,
)
self.block_info = block_info.block_info
with open(f'{log_dir}/rtrace-intermediate-{pid}-{tid}-func_args_ret.log', 'r') as f:
with open(f"{log_dir}/rtrace-intermediate-{pid}-{tid}-func_args_ret.log", "r") as f:
if so_names is None:
for line in f:
self.raw_logs.append(line.strip())
else:
so_names=so_names.split(",")
so_names= set([so_name.strip() for so_name in so_names])
so_names = so_names.split(",")
so_names = set([so_name.strip() for so_name in so_names])
for line in f:
if self.is_entry(line) or self.is_exit(line):
addr = self.get_entry_address(line) if self.is_entry(line) else self.get_exit_address(line)
addr = (
self.get_entry_address(line)
if self.is_entry(line)
else self.get_exit_address(line)
)
module = self.process_memory.get_module_at_address(addr)
if module is not None:
for so_name in so_names:
if so_name in module.path:
self.raw_logs.append(line.strip())
break
print(len(self.raw_logs), "function call logs loaded from", f'{log_dir}/rtrace-intermediate-{pid}-{tid}-func_args_ret.log')
print(
len(self.raw_logs),
"function call logs loaded from",
f"{log_dir}/rtrace-intermediate-{pid}-{tid}-func_args_ret.log",
)

def _create_call(self, abs_address):
func = None
if abs_address in self.abs_addr_to_func:
Expand Down Expand Up @@ -112,10 +131,16 @@ def process_logs(self):
elif CallLogProcessor.is_block(log):
total_blocks += 1
addr = CallLogProcessor.get_block(log)
if addr not in self.block_info or \
self.process_memory.get_module_at_address(addr) is None or \
self.process_memory.get_module_at_address(addr).get_function_at_address(addr) is None or \
self.process_memory.get_module_at_address(addr).get_function_at_address(addr).start != stack[-1].relative_addr: # the block does not belong to the current function
if (
addr not in self.block_info
or self.process_memory.get_module_at_address(addr) is None
or self.process_memory.get_module_at_address(addr).get_function_at_address(addr)
is None
or self.process_memory.get_module_at_address(addr)
.get_function_at_address(addr)
.start
!= stack[-1].relative_addr
): # the block does not belong to the current function
# this might due to exception handling
unmatch_func_block += 1
else: # only count the blocks belong to the current function
Expand All @@ -128,10 +153,13 @@ def process_logs(self):
addr = CallLogProcessor.get_exit_address(log)
call = stack.pop()
if call.abs_addr != addr:
# should not happend but it happens with some exit:0, might due to exception handling
# should not happen but it does with some exit:0,
# might be due to exception handling
unmatch_entry_exit += 1
print(
f"Unmatched entry/exit: {unmatch_entry_exit}/{total_calls}; final stack depth: {len(stack)}")
f"Unmatched entry/exit: {unmatch_entry_exit}/{total_calls}; "
f"final stack depth: {len(stack)}"
)

print(f"Unmatched function block: {unmatch_func_block}/{total_blocks}")

Expand All @@ -148,22 +176,24 @@ def serialize_call(cur_call):
"ret_val": cur_call.ret_val,
"executed_blocks": cur_call.executed_blocks,
"executed_insts": cur_call.executed_insts,
"calls": [serialize_call(c) for c in cur_call.calls]
"calls": [serialize_call(c) for c in cur_call.calls],
}
overview.append({
"so_path": cur_call.so_path,
"name": cur_call.name,
"start_addr": hex(cur_call.relative_addr),
"num_calls": len(cur_call.calls),
"executed_blocks": cur_call.executed_blocks,
"executed_insts": cur_call.executed_insts
})
overview.append(
{
"so_path": cur_call.so_path,
"name": cur_call.name,
"start_addr": hex(cur_call.relative_addr),
"num_calls": len(cur_call.calls),
"executed_blocks": cur_call.executed_blocks,
"executed_insts": cur_call.executed_insts,
}
)
return call_json

call_json = serialize_call(self.root_call)
with open(output_path, 'w') as f:
with open(output_path, "w") as f:
json.dump(call_json, f, indent=4)
pd.DataFrame(overview).to_csv(
output_path.replace('.json', '.csv'), index=False)
pd.DataFrame(overview).to_csv(output_path.replace(".json", ".csv"), index=False)

@staticmethod
def is_entry(line):
Expand Down
Loading
Loading