From dcd2e4abdf34767b97cc46f88589c5e950564787 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Tue, 12 May 2026 08:02:35 -1000 Subject: [PATCH 1/5] Making snakemake logger more robust --- wfcommons/wfinstances/logs/snakemake.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wfcommons/wfinstances/logs/snakemake.py b/wfcommons/wfinstances/logs/snakemake.py index 1e4672ce..47e1546e 100644 --- a/wfcommons/wfinstances/logs/snakemake.py +++ b/wfcommons/wfinstances/logs/snakemake.py @@ -163,6 +163,8 @@ def _build_file_map(self): if task_idx not in self.task_input_files and task_idx not in self.task_output_files: continue full_path = row[1] + # clean path + full_path = full_path.split(" (access:")[0].strip() if self.path_prefix_rewrite: full_path = full_path.replace(self.path_prefix_rewrite[0], self.path_prefix_rewrite[1]) file_size = os.path.getsize(f"{full_path}") From 1fb1d206f17ffe38fbeaa8fcc3bd8a4d1e623c27 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Tue, 12 May 2026 08:43:51 -1000 Subject: [PATCH 2/5] Snakemake logger enhancements: capture #threads for each task --- wfcommons/wfinstances/logs/snakemake.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wfcommons/wfinstances/logs/snakemake.py b/wfcommons/wfinstances/logs/snakemake.py index 47e1546e..90f14402 100644 --- a/wfcommons/wfinstances/logs/snakemake.py +++ b/wfcommons/wfinstances/logs/snakemake.py @@ -73,6 +73,7 @@ def __init__(self, self.file_objects = {} self.task_map = {} self.task_shell = {} + self.task_threads = {} self.task_input_files = {} self.task_output_files = {} self.file_input_output = {} @@ -140,6 +141,7 @@ def _build_task_map(self): for row in rows: task_idx = row[0] rule_idx = row[3] + threads = int(row[9]) # Shell command if row[8]: command_list = [x.rstrip().lstrip() for x in row[8].lstrip().rstrip().split('\n')] @@ -150,6 +152,7 @@ def _build_task_map(self): continue self.task_map[task_idx] = rules[rule_idx] + "_" + str(task_idx) self.task_shell[task_idx] = shell_cmd + self.task_threads[task_idx] = threads self.task_input_files[task_idx] = [] self.task_output_files[task_idx] = [] @@ -218,6 +221,7 @@ def _create_tasks(self): input_files=input_files, output_files=output_files, program=program_name, + cores=self.task_threads[idx], args=program_args, logger=self.logger) self.workflow.add_task(task) From 43689bc01b7cd717007b303348f45bbcb1b57c16 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Tue, 12 May 2026 19:05:58 -1000 Subject: [PATCH 3/5] Skipping Snakemake benchmark files when parsing Snakemake logs --- wfcommons/wfinstances/logs/snakemake.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wfcommons/wfinstances/logs/snakemake.py b/wfcommons/wfinstances/logs/snakemake.py index 90f14402..45ca6f37 100644 --- a/wfcommons/wfinstances/logs/snakemake.py +++ b/wfcommons/wfinstances/logs/snakemake.py @@ -162,6 +162,10 @@ def _build_file_map(self): cursor.execute("SELECT * FROM files") rows = cursor.fetchall() for row in rows: + file_type = row[2] + # Skip snakemake's BENCHMARK files (and besides snkmt doesn't deal with them correctly!) + if file_type == "BENCHMARK": + continue task_idx = row[3] if task_idx not in self.task_input_files and task_idx not in self.task_output_files: continue From 41613bd22908eace7ecb7e038eba3bb03431224e Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Tue, 12 May 2026 19:10:01 -1000 Subject: [PATCH 4/5] Ignoring "(cached)" files when parsing Snakemake logs --- wfcommons/wfinstances/logs/snakemake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wfcommons/wfinstances/logs/snakemake.py b/wfcommons/wfinstances/logs/snakemake.py index 45ca6f37..d89ea603 100644 --- a/wfcommons/wfinstances/logs/snakemake.py +++ b/wfcommons/wfinstances/logs/snakemake.py @@ -171,7 +171,7 @@ def _build_file_map(self): continue full_path = row[1] # clean path - full_path = full_path.split(" (access:")[0].strip() + full_path = full_path.split(" (access:")[0].split(" (cached)")[0].strip() if self.path_prefix_rewrite: full_path = full_path.replace(self.path_prefix_rewrite[0], self.path_prefix_rewrite[1]) file_size = os.path.getsize(f"{full_path}") From faedb6b95222831fc94a365c08d638c6334d8816 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Tue, 12 May 2026 19:21:00 -1000 Subject: [PATCH 5/5] Ignoring LOG files when parsing Snakemake logs --- wfcommons/wfinstances/logs/snakemake.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wfcommons/wfinstances/logs/snakemake.py b/wfcommons/wfinstances/logs/snakemake.py index d89ea603..41d0d52c 100644 --- a/wfcommons/wfinstances/logs/snakemake.py +++ b/wfcommons/wfinstances/logs/snakemake.py @@ -164,7 +164,8 @@ def _build_file_map(self): for row in rows: file_type = row[2] # Skip snakemake's BENCHMARK files (and besides snkmt doesn't deal with them correctly!) - if file_type == "BENCHMARK": + # and LOG files (which sometimes are missing anyway) + if file_type == "BENCHMARK" or file_type == "LOG": continue task_idx = row[3] if task_idx not in self.task_input_files and task_idx not in self.task_output_files: