Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
if: matrix.regression-tests
with:
path: tests/regression/test_data
key: test-data-llvm${{ matrix.llvm }}-${{ hashFiles('tests/regression/test_specs/*') }}
key: test-data-llvm${{ matrix.llvm }}-bc-${{ hashFiles('tests/regression/test_specs/*') }}

# Download kernel sources:
# - if test_data is cached, we only need the kernel for unit tests
Expand Down Expand Up @@ -113,4 +113,4 @@ jobs:
uses: actions/cache@v4
with:
path: tests/regression/test_data
key: test-data-llvm${{ matrix.llvm }}-${{ hashFiles('tests/regression/test_specs/*') }}
key: test-data-llvm${{ matrix.llvm }}-bc-${{ hashFiles('tests/regression/test_specs/*') }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ cmake-build-debug/
.vscode/

# Temporary files from regression tests. Used for debugging purposes only.
*.ll
*.bc
*.smt2
*.pdf

Expand Down
26 changes: 13 additions & 13 deletions diffkemp/building/cc_wrapper/cc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ def wrapper(argv):
contains_source = contains_source or is_source_file
if index > 1 and argv[index - 1] == "-o":
if is_object_file and not linking:
# Compiling to object file: swap .o with .ll
arg = arg.rsplit(".", 1)[0] + ".ll"
# Compiling to object file: swap .o with .bc
arg = arg.rsplit(".", 1)[0] + ".bc"
if not is_object_file and linking:
# Linking: add a .llw suffix (LLVM IR whole)
arg = arg + ".llw"
# Linking: add a .bcw suffix (LLVM IR whole)
arg = arg + ".bcw"
output_file = arg
elif is_object_file and linking:
# Input to linking phase: change suffix to .ll
arg = arg.rsplit(".", 1)[0] + ".ll"
# Input to linking phase: change suffix to .bc
arg = arg.rsplit(".", 1)[0] + ".bc"
clang = llvm_link
elif is_source_file and linking:
# Mark as linking with sources to detect hybrid mode
Expand All @@ -153,16 +153,16 @@ def wrapper(argv):
# Compile/link mode with object files detected
# Drop object files and revert to normal compile/link mode
clang = old_clang
clang_argv = [arg for arg in clang_argv if not arg.endswith(".ll")]
clang_argv = [arg for arg in clang_argv if not arg.endswith(".bc")]

# Do not continue if output is not .ll or .llw
# Do not continue if output is not .bc or .bcw
# Note: this means that this is neither compilation nor linking
if (output_file is not None and not output_file.endswith(".ll") and
not output_file.endswith(".llw")):
if (output_file is not None and not output_file.endswith(".bc") and
not output_file.endswith(".bcw")):
return 0

# Do not run clang on conftest files
if output_file in ["conftest.ll", "conftest.llw"] or "conftest.c" in argv:
if output_file in ["conftest.bc", "conftest.bcw"] or "conftest.c" in argv:
return 0

# Not compiling C source file
Expand All @@ -176,7 +176,7 @@ def wrapper(argv):
elif not linking:
# Compiling to default output file
db.extend(["o:" + os.path.join(os.getcwd(),
arg.rsplit(".", 1)[0] + ".ll")
arg.rsplit(".", 1)[0] + ".bc")
for arg in clang_argv if not arg.endswith(".c")])

# Analyze and modify parameters for clang (phase 2)
Expand All @@ -192,7 +192,7 @@ def wrapper(argv):
else:
# Keep only arguments with input files (and llvm-link itself)
clang_argv = [arg for arg in clang_argv if arg == clang or
arg.endswith(".ll") or arg.endswith(".llw") or
arg.endswith(".bc") or arg.endswith(".bcw") or
arg == "-o"]
# Remove non-existent files
# Note: these might have been e.g. generated from assembly
Expand Down
2 changes: 1 addition & 1 deletion diffkemp/llvm_ir/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def get_clang_default_options(default_optim=True):
"""Returns clang options for compiling c files to LLVM IR.
:param default_optim: By default adds also optimization flags."""
opts = ["-S", "-emit-llvm", "-g", "-fdebug-macro", "-Wno-format-security"]
opts = ["-c", "-emit-llvm", "-g", "-fdebug-macro", "-Wno-format-security"]
if default_optim:
opts.extend(["-O1", "-Xclang", "-disable-llvm-passes"])
return opts
22 changes: 13 additions & 9 deletions diffkemp/llvm_ir/kernel_llvm_source_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def find_llvm_with_symbol_def(self, symbol):
llvm_filename = self._build_source_to_llvm(source_path)
if os.path.isfile(llvm_filename):
mod = LlvmModule(llvm_filename)
if mod.has_function(symbol) or mod.has_global(symbol):
if mod.has_definition(symbol):
break
except BuildException:
pass
Expand Down Expand Up @@ -395,9 +395,9 @@ def _gcc_to_llvm(gcc_command):
if param.startswith('-D"DEBUG_HASH2='):
param = '-D"DEBUG_HASH2=1"'

# Output name is given by replacing .c by .ll in source name
# Output name is given by replacing .c by .bc in source name
if param.endswith(".c"):
output_file = "{}.ll".format(param[:-2])
output_file = "{}.bc".format(param[:-2])

command.append(KernelLlvmSourceBuilder._strip_bash_quotes(param))
if output_file is None:
Expand All @@ -414,10 +414,10 @@ def _ld_to_llvm(ld_command):
:param ld_command: Command to convert
:return Corresponding llvm-link command.
"""
command = ["llvm-link", "-S"]
command = ["llvm-link"]
for param in ld_command.split():
if param.endswith(".o"):
command.append("{}.ll".format(param[:-2]))
command.append("{}.bc".format(param[:-2]))
elif param == "-o":
command.append(param)
return command
Expand Down Expand Up @@ -486,8 +486,12 @@ def _get_build_object(command):

@staticmethod
def _get_build_source(command):
"""Get name of the object file built by the command."""
return command[command.index("-c") + 1]
"""
Get name of the source file built by the command.
The commands start with 'clang -c -emit-llvm' to avoid linking
and the source file follows '-c' later in the command.
"""
return command[command.index("-c", 3) + 1]

def _kbuild_object_command(self, object_file):
"""
Expand Down Expand Up @@ -568,7 +572,7 @@ def _build_source_to_llvm(self, source_file):
:param source_file: C source to build
:return: Created LLVM IR file
"""
llvm_file = "{}.ll".format(source_file[:-2])
llvm_file = "{}.bc".format(source_file[:-2])
if (not os.path.isfile(llvm_file) or os.path.getmtime(llvm_file) <
os.path.getmtime(source_file)):
cwd = os.getcwd()
Expand Down Expand Up @@ -627,7 +631,7 @@ def _build_kernel_mod_to_llvm(self, mod_dir, mod_name):
obj = self._get_build_object(c)
if not os.path.isfile(obj) or built:
check_call(c, stderr=stderr)
llvm_file = os.path.join(mod_dir, "{}.ll".format(file_name))
llvm_file = os.path.join(mod_dir, "{}.bc".format(file_name))
opt_llvm(llvm_file)
return llvm_file
except CalledProcessError:
Expand Down
102 changes: 73 additions & 29 deletions diffkemp/llvm_ir/llvm_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import os
import re
import shutil
from subprocess import check_call, CalledProcessError
from subprocess import check_call, check_output
from subprocess import CalledProcessError, PIPE, run


# Set of standard functions that are supported, so they should not be
# included in function collecting.
Expand Down Expand Up @@ -76,10 +78,10 @@ def link_modules(self, modules):
return False

if "-linked" not in self.llvm:
new_llvm = "{}-linked.ll".format(self.llvm[:-3])
new_llvm = "{}-linked.bc".format(self.llvm[:-3])
else:
new_llvm = self.llvm
link_command = ["llvm-link", "-S", self.llvm]
link_command = ["llvm-link", self.llvm]
link_command.extend([m.llvm for m in link_llvm_modules])
link_command.extend(["-o", new_llvm])
opt_command = get_opt_command([("constmerge", "module")], new_llvm)
Expand Down Expand Up @@ -112,17 +114,30 @@ def find_param_var(self, param):
name = self.llvm_module.find_param_var(param)
return LlvmParam(name, []) if name is not None else None

def module_has(self, symtype_pattern, symbol):
"""
Check if a module contains matches for a pattern.
Used in has_function, has_global and has_definition.
"""
command = ["llvm-nm", self.llvm]
source_dir = os.path.dirname(self.llvm)
nm_out = check_output(command, cwd=source_dir)
pattern = re.compile(rf"{symtype_pattern} {re.escape(symbol)}",
re.MULTILINE)
match = pattern.search(nm_out.decode())
return match is not None

def has_function(self, fun):
"""Check if module contains a function definition."""
pattern = re.compile(r"^define.*@{}\(".format(fun), flags=re.MULTILINE)
with open(self.llvm, "r") as llvm_file:
return pattern.search(llvm_file.read()) is not None
return self.module_has("[T|t]", fun)

def has_global(self, glob):
"""Check if module contains a global variable with the given name."""
pattern = re.compile(r"^@{}\s*=".format(glob), flags=re.MULTILINE)
with open(self.llvm, "r") as llvm_file:
return pattern.search(llvm_file.read()) is not None
return self.module_has("[DdBbCU]", glob)

def has_definition(self, symbol):
"""Check if module contains a given symbol definition."""
return self.module_has("[DdBbCTt]", symbol)

def is_declaration(self, fun):
"""
Expand Down Expand Up @@ -156,16 +171,21 @@ def move_to_other_root_dir(self, old_root, new_root):
if self.llvm.startswith(old_root):
dest_llvm = os.path.join(new_root,
os.path.relpath(self.llvm, old_root))
# Copy the .ll file and replace all occurrences of the old root by
# the new root. There are usually in debug info.
with open(self.llvm, "r") as llvm:
with open(dest_llvm, "w") as llvm_new:
for line in llvm.readlines():
if "constant" not in line:
llvm_new.write(line.replace(old_root.strip("/"),
new_root.strip("/")))
else:
llvm_new.write(line)
# Copy the .bc file and replace all occurrences of the old root by
# the new root. There are usually in debug info. Use textual
# LLVM IR to find the paths.
command = ["llvm-dis", self.llvm, "-o", "-"]
source_dir = os.path.dirname(self.llvm)
output = check_output(command, cwd=source_dir).decode()
new_lines = []
for line in output.splitlines():
if "constant" not in line:
new_lines.append(line.replace(old_root.strip("/"),
new_root.strip("/")))
else:
new_lines.append(line)
run(["llvm-as", "-o", dest_llvm], input="\n".join(new_lines),
stdout=PIPE, stderr=PIPE, text=True, check=True)
self.llvm = dest_llvm

if self.source and self.source.startswith(old_root):
Expand All @@ -178,18 +198,42 @@ def move_to_other_root_dir(self, old_root, new_root):
def get_included_sources(self):
"""
Get the list of source files that this module includes.
Requires debugging information.
Sources are extracted from the llvm-bcanalyzer output (with --dump).
The are located in the first METADATA_BLOCK as STRINGS.
The first string is the file name, second is project directory,
the includes follow.
"""
# Search for all .h files mentioned in the debug info.
pattern = re.compile(r"filename:\s*\"([^\"]*)\", "
r"directory:\s*\"([^\"]*)\"")
source_dir = ''.join(os.path.split(self.llvm)[0])
command = ["llvm-bcanalyzer", self.llvm, "-dump"]
bc_out = check_output(command, cwd=source_dir)
result = set()
with open(self.llvm, "r") as llvm:
for line in llvm.readlines():
s = pattern.search(line)
if (s and (s.group(1).endswith(".h") or
s.group(1).endswith(".c"))):
result.add(os.path.join(s.group(2), s.group(1)))
in_metadata = False
in_strings = False
root_dir = ""
source_file = ""
for line in bc_out.decode().splitlines():
line = line.strip()
if line.startswith("<METADATA_BLOCK "):
in_metadata = True
elif in_metadata and line.startswith("<STRINGS "):
in_strings = True
continue
if not in_strings:
continue
if line == "}":
break
# Extract paths: 1st path is source file name,
# 2nd is project directory
string = line[1:-1]
if not source_file:
source_file = string
elif not root_dir:
root_dir = string
# Add source file when project directory is known
result.add(os.path.join(root_dir, source_file))
elif (string.endswith(".h") or string.endswith(".c")
) and not string.startswith("/"):
result.add(os.path.join(root_dir, string))
return result

def get_functions_using_param(self, param):
Expand Down
2 changes: 1 addition & 1 deletion diffkemp/llvm_ir/single_c_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, source_dir, c_file_name, clang="clang",
:param default_optim: use default optimalisations flags
and run LLVM IR simplification passes
"""
llvm_file_name = os.path.splitext(c_file_name)[0] + ".ll"
llvm_file_name = os.path.splitext(c_file_name)[0] + ".bc"
SingleLlvmFinder.__init__(self, source_dir, llvm_file_name)

self.c_file_name = c_file_name
Expand Down
17 changes: 10 additions & 7 deletions diffkemp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import subprocess
import re
import sys
from subprocess import check_output

LLVM_FUNCTION_REGEX = re.compile(r"^define.*@(\w+)\(", flags=re.MULTILINE)
LLVM_FUNCTION_REGEX = re.compile(r"^.* [T|t] ([\w|\.|\$]+)",
flags=re.MULTILINE)
# Name of YAML output file created by diffkemp compare command.
CMP_OUTPUT_FILE = "diffkemp-out.yaml"

Expand Down Expand Up @@ -42,7 +44,7 @@ def get_opt_command(passes, llvm_file, overwrite=True):
pass_names = map(lambda p: p[0], passes)
opt_command.extend(map(lambda pass_name: f"-{pass_name}", pass_names))
if overwrite:
opt_command.extend(["-S", "-o", llvm_file])
opt_command.extend(["-o", llvm_file])
return opt_command


Expand Down Expand Up @@ -96,9 +98,10 @@ def get_functions_from_llvm(llvm_files):
sys.stderr.write(
f"Warning: llvm file '{llvm_filename}' does not exist\n")
continue
with open(llvm_filename, 'r') as llvm_file:
llvm_file_content = llvm_file.read()
matches = LLVM_FUNCTION_REGEX.findall(llvm_file_content)
for match in matches:
functions[match] = llvm_filename
command = ["llvm-nm", llvm_filename]
source_dir = ''.join(os.path.split(llvm_filename)[0])
nm_out = check_output(command, cwd=source_dir)
matches = LLVM_FUNCTION_REGEX.findall(nm_out.decode())
for match in matches:
functions[match] = llvm_filename
return functions
6 changes: 3 additions & 3 deletions tests/regression/mock_source_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, source_dir, real_source_tree):
self.real_source_tree = real_source_tree

def get_module_for_symbol(self, symbol, created_before=None):
llvm_file = os.path.join(self.source_dir, "{}.ll".format(symbol))
llvm_file = os.path.join(self.source_dir, "{}.bc".format(symbol))
src_file = os.path.join(self.source_dir, "{}.c".format(symbol))

if not os.path.exists(llvm_file) and self.real_source_tree is not None:
Expand All @@ -35,7 +35,7 @@ def get_module_for_symbol(self, symbol, created_before=None):
return LlvmModule(llvm_file, src_file)

def get_kernel_module(self, mod_dir, mod_name):
llvm_file = os.path.join(self.source_dir, "{}.ll".format(mod_name))
llvm_file = os.path.join(self.source_dir, "{}.bc".format(mod_name))

if not os.path.exists(llvm_file):
assert self.real_source_tree is not None
Expand All @@ -45,7 +45,7 @@ def get_kernel_module(self, mod_dir, mod_name):
return LlvmModule(llvm_file)

def get_sysctl_module(self, sysctl):
llvm_file = os.path.join(self.source_dir, "{}.ll".format(sysctl))
llvm_file = os.path.join(self.source_dir, "{}.bc".format(sysctl))
table_file = os.path.join(self.source_dir, "table")

if not os.path.exists(llvm_file):
Expand Down
2 changes: 1 addition & 1 deletion tests/testing_projects/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
*.o

# Files created by the build command
*.ll
*.bc
function_list
*.so.llw
Loading