From 9278f15eb5009d30cf8413bd2fe7c418341e7d72 Mon Sep 17 00:00:00 2001
From: xiaoliz0 <xiaoliz@ifi.uio.no>
Date: Fri, 9 Jan 2026 14:35:16 +0100
Subject: [PATCH 01/24] Add new function to split the table generated in the
 end of the report when it is long.

---
 Config/configure_PRONTO.ini |  2 +
 Script/PRONTO.py            | 80 +++++++++++++++++++++++++++++++++++--
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/Config/configure_PRONTO.ini b/Config/configure_PRONTO.ini
index 0540539..411330a 100644
--- a/Config/configure_PRONTO.ini
+++ b/Config/configure_PRONTO.ini
@@ -9,6 +9,8 @@ data_path = /data/sample_data/analysis_results/
 encoding_sys = utf-8
 ;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables):
 filter_col_nu = 5
+;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables.
+table_max_rows_per_slide = 15
 ;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files.
 material_file_version = 2025
 
diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 4f6d128..50f61ae 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -784,6 +784,78 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 	return data_nrows
 
 
+def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
+	table_file = open(table_data_file)
+	lines = table_file.readlines()
+	if not lines:
+		return
+	first_line = lines[0]
+	rows = len(lines)
+	first_line_cells = first_line.split('\t')
+	cols = len(table_header)
+	header_not_exist_in_table = []
+	for n in range(len(table_header)):
+		if_exist = False
+		if(table_header[n] in first_line_cells):
+			if_exist = True
+		if not if_exist:
+			header_not_exist_in_table.append(n)
+	data_rows = []
+	for line in lines:
+		if(line != first_line):
+			line_cells =  line.strip('\t')
+			if header_not_exist_in_table:
+				for num in header_not_exist_in_table:
+					line_cells.insert(num," ")
+			row_data = [cell.strip() for cell in line.split('\t')]
+			data_rows.append(row_data)
+
+	ppt = Presentation(output_ppt_file)
+	if(rows <= table_max_rows_per_slide):
+		total_slides_needed = 1
+	else:
+		total_slides_needed = rows // table_max_rows_per_slide + 1
+
+	total_rows = len (data_rows)
+	start_idx = 0
+	while start_idx < total_rows:
+		end_idx  = min(start_idx + table_max_rows_per_slide, total_rows)
+		slide_data = data_rows[start_idx:end_idx]
+		slide = ppt.slides.add_slide(ppt.slide_layouts[6])
+		shapes = slide.shapes
+		left = Inches(left_t)
+		top = Inches(top_t)
+		width = Inches(width_t)
+		height = Inches(height_t)
+		table_rows = len(slide_data) + 1
+		table = shapes.add_table(table_rows,cols,left,top,width,height).table
+		for c in range(cols):
+			if table_column_width:
+				table.columns[c].width = Inches(table_column_width[c])
+			table.cell(0,c).text = table_header[c]
+			table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)
+
+		for row_idx, row_data in enumerate(slide_data, start=1):
+			for col_idx in range(cols):
+				table.cell(row_idx,col_idx).text = str(row_data[col_idx])
+				table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)
+
+		start_idx = end_idx
+
+		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
+		tf = textbox.text_frame
+		if(if_print_rowNo == True):
+			tf.paragraphs[0].text = table_name +" (N=" + str(table_rows - 1) + ")"
+		else:
+			tf.paragraphs[0].text = table_name
+		tf.paragraphs[0].font.size = Pt(8)
+		tf.paragraphs[0].font.bold = True
+		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
+
+	ppt.save(output_ppt_file)
+	return total_slides_needed
+
+
 def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
 	DNA_summary_file = open(DNA_variant_summary_file)
 	global str_TMB_DRUP
@@ -1534,7 +1606,8 @@ def main(argv):
 				slide8_table_font_size = 7
 				if_print_rowNo = True
 				table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53]
-				slide8_table_nrows = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width)
+				table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - 1
+				slide8_table_slides = insert_table_to_ppt_end(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)
 
 				# Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report.
 				A2_to_extract=[2]
@@ -1545,12 +1618,13 @@ def main(argv):
 					B3_C1_to_extract = [4, 5]
 				pdf_page_image_to_ppt(CNV_overview_plots_pdf,output_ppt_file,B3_C1_to_extract,width_scale=1,height_scale=0.5)
 
-        		# Change slides order.
+        			# Change slides order.
 				ppt = Presentation(output_ppt_file)
+				slide_count = len(ppt.slides)
 				slides = ppt.slides._sldIdLst
 				slides_list = list(slides)
 				slides.remove(slides_list[7])
-				slides.insert(12,slides_list[7])
+				slides.insert(slide_count + 1,slides_list[7])
 				ppt.save(output_ppt_file)
 				print("Generate report for " + DNA_sampleID)
 				ppt_nr += 1

From 813bfdfde0a34965694d5e2dca74b7d3d7f32f12 Mon Sep 17 00:00:00 2001
From: xiaoliz0 <xiaoliz@ifi.uio.no>
Date: Tue, 13 Jan 2026 12:04:07 +0100
Subject: [PATCH 02/24] Update header information for the table in the end
 based on the comments from biology group.

---
 Script/PRONTO.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 50f61ae..9e0286f 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -818,6 +818,7 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt
 
 	total_rows = len (data_rows)
 	start_idx = 0
+	table_page_num = 1
 	while start_idx < total_rows:
 		end_idx  = min(start_idx + table_max_rows_per_slide, total_rows)
 		slide_data = data_rows[start_idx:end_idx]
@@ -845,12 +846,13 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt
 		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
-			tf.paragraphs[0].text = table_name +" (N=" + str(table_rows - 1) + ")"
+			tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(table_page_num) + "/" + str(total_slides_needed) + ")"
 		else:
 			tf.paragraphs[0].text = table_name
 		tf.paragraphs[0].font.size = Pt(8)
 		tf.paragraphs[0].font.bold = True
 		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
+		table_page_num = table_page_num + 1
 
 	ppt.save(output_ppt_file)
 	return total_slides_needed

From 64d8093430a123199bc6a31214a4131d11a3157e Mon Sep 17 00:00:00 2001
From: xiaoliz0 <xiaoliz@ifi.uio.no>
Date: Tue, 20 Jan 2026 13:35:53 +0100
Subject: [PATCH 03/24] Update codes based on comments from Matin. Merge the
 two functions of inserting table into one function.

---
 Config/configure_PRONTO.ini |   2 +-
 Script/PRONTO.py            | 127 ++++++++++--------------------------
 2 files changed, 37 insertions(+), 92 deletions(-)

diff --git a/Config/configure_PRONTO.ini b/Config/configure_PRONTO.ini
index 411330a..7b74237 100644
--- a/Config/configure_PRONTO.ini
+++ b/Config/configure_PRONTO.ini
@@ -10,7 +10,7 @@ encoding_sys = utf-8
 ;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables):
 filter_col_nu = 5
 ;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables.
-table_max_rows_per_slide = 15
+table_max_rows_per_slide = 14
 ;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files.
 material_file_version = 2025
 
diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 9e0286f..f3dee15 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -724,73 +724,12 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
 	ppt.save(output_ppt_file)
 
 
-def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width):
+def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
 	table_file = open(table_data_file)
 	lines = table_file.readlines()
 	if not lines:
 		return
 	first_line = lines[0]
-	rows = len(lines)
-	first_line_cells = first_line.split('\t')
-	cols = len(table_header)
-	header_not_exist_in_table = []
-	for n in range(len(table_header)):
-		if_exist = False
-		if(table_header[n] in first_line_cells):
-			if_exist = True
-		if not if_exist:
-			header_not_exist_in_table.append(n)
-	ppt = Presentation(output_ppt_file)
-	try:
-		slide = ppt.slides[slide_n-1]
-	except:
-		slide = ppt.slides.add_slide(ppt.slide_layouts[6])
-	shapes = slide.shapes
-	left = Inches(left_t)
-	top = Inches(top_t)
-	width = Inches(width_t)
-	height = Inches(height_t)
-	table = shapes.add_table(rows,cols,left,top,width,height).table
-	table_rows = rows-1
-	for c in range(cols):
-		if table_column_width:
-			table.columns[c].width = Inches(table_column_width[c])
-		table.cell(0,c).text = table_header[c]
-		table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)
-
-	row = 1
-	for line in open(table_data_file):
-		if(line != first_line):
-			line_cells = line.split('\t')
-			if header_not_exist_in_table:
-				for num in header_not_exist_in_table:
-					line_cells.insert(num," ")
-			for j in range(len(line_cells) - 1):
-				table.cell(row,j).text = str(line_cells[j])
-				table.cell(row,j).text_frame.paragraphs[0].font.size = Pt(font_size)
-			row += 1	
-	textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
-	tf = textbox.text_frame
-	if(if_print_rowNo == True):
-		tf.paragraphs[0].text = table_name +" (N=" + str(table_rows) + ")"
-	else:
-		tf.paragraphs[0].text = table_name
-	tf.paragraphs[0].font.size = Pt(8)
-	tf.paragraphs[0].font.bold = True
-	tf.paragraphs[0].alignment = PP_ALIGN.CENTER
-
-	ppt.save(output_ppt_file)
-	data_nrows = table_rows
-	return data_nrows
-
-
-def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
-	table_file = open(table_data_file)
-	lines = table_file.readlines()
-	if not lines:
-		return
-	first_line = lines[0]
-	rows = len(lines)
 	first_line_cells = first_line.split('\t')
 	cols = len(table_header)
 	header_not_exist_in_table = []
@@ -801,34 +740,40 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt
 		if not if_exist:
 			header_not_exist_in_table.append(n)
 	data_rows = []
-	for line in lines:
-		if(line != first_line):
-			line_cells =  line.strip('\t')
-			if header_not_exist_in_table:
-				for num in header_not_exist_in_table:
-					line_cells.insert(num," ")
-			row_data = [cell.strip() for cell in line.split('\t')]
-			data_rows.append(row_data)
+	for line in lines[1:]:
+		line_cells =  line.strip('\t')
+		if header_not_exist_in_table:
+			for num in header_not_exist_in_table:
+				line_cells.insert(num," ")
+		row_data = [cell.strip() for cell in line.split('\t')]
+		data_rows.append(row_data)
+	total_rows = len(data_rows)
 
 	ppt = Presentation(output_ppt_file)
-	if(rows <= table_max_rows_per_slide):
+	if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide):
 		total_slides_needed = 1
+		rows_per_page = total_rows
+		start_slide_index = slide_n
 	else:
-		total_slides_needed = rows // table_max_rows_per_slide + 1
-
-	total_rows = len (data_rows)
-	start_idx = 0
-	table_page_num = 1
-	while start_idx < total_rows:
-		end_idx  = min(start_idx + table_max_rows_per_slide, total_rows)
-		slide_data = data_rows[start_idx:end_idx]
-		slide = ppt.slides.add_slide(ppt.slide_layouts[6])
+		total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
+		rows_per_page = table_max_rows_per_slide
+		start_slide_index = None
+
+	for page_num in range(total_slides_needed):
+		start_idx = page_num * rows_per_page
+		end_idx = min(start_idx + rows_per_page, total_rows)
+		current_page_data = data_rows[start_idx:end_idx]
+		current_page_rows = len(current_page_data)
+		if(start_slide_index is not None and page_num == 0):
+			slide = ppt.slides[slide_n - 1]
+		else:
+			slide = ppt.slides.add_slide(ppt.slide_layouts[6])
 		shapes = slide.shapes
 		left = Inches(left_t)
 		top = Inches(top_t)
 		width = Inches(width_t)
 		height = Inches(height_t)
-		table_rows = len(slide_data) + 1
+		table_rows = current_page_rows + 1
 		table = shapes.add_table(table_rows,cols,left,top,width,height).table
 		for c in range(cols):
 			if table_column_width:
@@ -836,26 +781,26 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt
 			table.cell(0,c).text = table_header[c]
 			table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)
 
-		for row_idx, row_data in enumerate(slide_data, start=1):
+		for row_idx, row_data in enumerate(current_page_data, start=1):
 			for col_idx in range(cols):
 				table.cell(row_idx,col_idx).text = str(row_data[col_idx])
 				table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)
 
-		start_idx = end_idx
-
 		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
-			tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(table_page_num) + "/" + str(total_slides_needed) + ")"
+			if(table_max_rows_per_slide is not None):
+				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
+			else:
+				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")"
 		else:
 			tf.paragraphs[0].text = table_name
 		tf.paragraphs[0].font.size = Pt(8)
 		tf.paragraphs[0].font.bold = True
 		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
-		table_page_num = table_page_num + 1
 
 	ppt.save(output_ppt_file)
-	return total_slides_needed
+	return total_rows, total_slides_needed
 
 
 def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
@@ -1588,7 +1533,7 @@ def main(argv):
 				slide6_table_font_size = 7
 				if_print_rowNo = False
 				for table_index in slide6_table_ppSlide:
-					slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[])
+					slide6_table_nrows, _ = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None)
 				output_file_preMTB_AppendixTable = output_file_preMTB_table_path + "_preMTBTable_Appendix.txt"
 				output_table_file_filterResults_AllReporVariants_CodingRegion = output_file_preMTB_table_path + "_AllReporVariants_CodingRegion.txt"
 				stable_text = update_ppt_variant_summary_table(slide6_table_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file)
@@ -1608,8 +1553,8 @@ def main(argv):
 				slide8_table_font_size = 7
 				if_print_rowNo = True
 				table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53]
-				table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - 1
-				slide8_table_slides = insert_table_to_ppt_end(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)
+				table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide"))
+				_, slide8_table_slides_added = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)
 
 				# Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report.
 				A2_to_extract=[2]
@@ -1626,7 +1571,7 @@ def main(argv):
 				slides = ppt.slides._sldIdLst
 				slides_list = list(slides)
 				slides.remove(slides_list[7])
-				slides.insert(slide_count + 1,slides_list[7])
+				slides.append(slides_list[7])
 				ppt.save(output_ppt_file)
 				print("Generate report for " + DNA_sampleID)
 				ppt_nr += 1

From 995f7a82421a5dbe9efb4fbca5864cd9b15801ab Mon Sep 17 00:00:00 2001
From: xiaoliz0 <xiaoliz@ifi.uio.no>
Date: Tue, 27 Jan 2026 09:16:51 +0100
Subject: [PATCH 04/24] Update codes based on the comments from Martin.

---
 Script/PRONTO.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index f3dee15..dd77f7e 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -741,7 +741,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 			header_not_exist_in_table.append(n)
 	data_rows = []
 	for line in lines[1:]:
-		line_cells =  line.strip('\t')
+		line_cells =  line.split('\t')
 		if header_not_exist_in_table:
 			for num in header_not_exist_in_table:
 				line_cells.insert(num," ")
@@ -1567,7 +1567,6 @@ def main(argv):
 
         			# Change slides order.
 				ppt = Presentation(output_ppt_file)
-				slide_count = len(ppt.slides)
 				slides = ppt.slides._sldIdLst
 				slides_list = list(slides)
 				slides.remove(slides_list[7])

From 18362bcde967436a35ae2f3dee3b125ebf5affdf Mon Sep 17 00:00:00 2001
From: xiaoliz0 <xiaoliz@ifi.uio.no>
Date: Tue, 27 Jan 2026 12:47:29 +0100
Subject: [PATCH 05/24] Remove the return value for total number of added
 slides for the table in the end of the report, sinice it is not in use in any
 other places.

---
 Script/PRONTO.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index dd77f7e..d0630e7 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -800,7 +800,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
 
 	ppt.save(output_ppt_file)
-	return total_rows, total_slides_needed
+	return total_rows
 
 
 def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
@@ -1533,7 +1533,7 @@ def main(argv):
 				slide6_table_font_size = 7
 				if_print_rowNo = False
 				for table_index in slide6_table_ppSlide:
-					slide6_table_nrows, _ = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None)
+					slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None)
 				output_file_preMTB_AppendixTable = output_file_preMTB_table_path + "_preMTBTable_Appendix.txt"
 				output_table_file_filterResults_AllReporVariants_CodingRegion = output_file_preMTB_table_path + "_AllReporVariants_CodingRegion.txt"
 				stable_text = update_ppt_variant_summary_table(slide6_table_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file)
@@ -1554,7 +1554,7 @@ def main(argv):
 				if_print_rowNo = True
 				table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53]
 				table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide"))
-				_, slide8_table_slides_added = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)
+				insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)
 
 				# Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report.
 				A2_to_extract=[2]

From bebe60c66f28c9d85d92049d3165deffb037c631 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:35:12 +0000
Subject: [PATCH 06/24] chore: add pandas and sort alphabetically

---
 requirements-test.txt | 4 ++--
 requirements.txt      | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/requirements-test.txt b/requirements-test.txt
index b30f9dc..517e35c 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,3 +1,3 @@
-pytest==7.4.3
+pytest-emoji==0.2.0
 pytest-md==0.2.0
-pytest-emoji==0.2.0
\ No newline at end of file
+pytest==7.4.3
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 64993a3..d0ec330 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 click==8.1.7
+pandas==2.2.3
+pdf2image==1.17.0
 python-docx==1.1.0
 python-pptx==0.6.23
 xlrd==2.0.1
 xlutils==2.0.0
-pdf2image==1.17.0

From 2d84df795dbfe8bac062fbd6dc2549c3a98abf1f Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:36:00 +0000
Subject: [PATCH 07/24] feat: add function to normalize column indices

---
 pronto/pronto.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 3f87cbb..c5ff8e1 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -1,6 +1,7 @@
 import glob
 import logging
 import os
+import pandas
 
 # get tumor mutational burden label
 def get_tmb_string(val):
@@ -27,3 +28,15 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
 	else:
 		logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
 		raise ValueError
+
+def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
+	# determine current, missing and additional column indices
+	curr_col_idx = df.columns.tolist()
+	miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx))
+	add_col_idx = list(set(curr_col_idx) - set(exp_col_idx))
+	# add missing column indices
+	for i in miss_col_idx:
+		df[i] = ' '
+	# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
+	all_col_idx = exp_col_idx + add_col_idx
+	return df[all_col_idx]
\ No newline at end of file

From fd2e0acfcd06c69bc49ac8fd23f4439aeba10b07 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:36:41 +0000
Subject: [PATCH 08/24] test: add unittests for column index normalization
 function

---
 pronto/tests/pronto_test.py | 83 +++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index c0e6aa7..9213355 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -1,3 +1,4 @@
+import pandas
 import pytest
 import pronto.pronto
 
@@ -99,3 +100,85 @@ def test_get_tmb_string(input, exception, want):
 def test_glob_tsoppi_file(inputs, exception, want):
     with exception:
         assert pronto.pronto.glob_tsoppi_file(*inputs) == want
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "three": [5, 6],
+                    "four": [7, 8],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [5, 6],
+                "four": [7, 8],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "four": [7, 8],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [' ', ' '],
+                "four": [7, 8],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "three": [5, 6],
+                    "four": [7, 8],
+                }),
+                ["two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "two": [3, 4],
+                "three": [5, 6],
+                "four": [7, 8],
+                "one": [1, 2],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "four": [7, 8],
+                    "five": [9, 10],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [' ', ' '],
+                "four": [7, 8],
+                "five": [9, 10],
+            }),
+        ),
+    ]
+)
+def test_normalize_column_index(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.normalize_column_index(*inputs)
+        assert want.equals(get)
\ No newline at end of file

From 09e6aee619e1d6b958cef5aab48f21c3680d8488 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:42:39 +0000
Subject: [PATCH 09/24] feat: use column index normalizing function in main
 script, add comments and clean up

---
 Script/PRONTO.py | 56 +++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 3aa3c4a..2efa4be 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -28,6 +28,7 @@
 from decimal import Decimal
 from copy import deepcopy
 import pronto.pronto as pronto
+import pandas
 from pdf2image import convert_from_path
 
 runID = ""
@@ -731,45 +732,38 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
 	ppt.save(output_ppt_file)
 
 
-def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
-	table_file = open(table_data_file)
-	lines = table_file.readlines()
-	if not lines:
+def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
+
+	# load table data
+	try:
+		table_data = pandas.read_csv(table_file, sep='\t')
+	except pandas.errors.EmptyDataError:
+		logging.warning("The file is empty.")
 		return
-	first_line = lines[0]
-	first_line_cells = first_line.split('\t')
+	
+	# add empty columns for missing header columns and move additional columns to the right
+	table_data = pronto.normalize_column_index(table_data, table_header)
+
+	# determine column and row number
 	cols = len(table_header)
-	header_not_exist_in_table = []
-	for n in range(len(table_header)):
-		if_exist = False
-		if(table_header[n] in first_line_cells):
-			if_exist = True
-		if not if_exist:
-			header_not_exist_in_table.append(n)
-	data_rows = []
-	for line in lines[1:]:
-		line_cells =  line.split('\t')
-		if header_not_exist_in_table:
-			for num in header_not_exist_in_table:
-				line_cells.insert(num," ")
-		row_data = [cell.strip() for cell in line.split('\t')]
-		data_rows.append(row_data)
-	total_rows = len(data_rows)
+	rows = len(table_data)
 
-	ppt = Presentation(output_ppt_file)
-	if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide):
+	# how many slides, rows per slide, and start slide index
+	if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide):
 		total_slides_needed = 1
-		rows_per_page = total_rows
+		rows_per_page = rows
 		start_slide_index = slide_n
 	else:
-		total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
+		total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
 		rows_per_page = table_max_rows_per_slide
 		start_slide_index = None
 
+	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
 		start_idx = page_num * rows_per_page
-		end_idx = min(start_idx + rows_per_page, total_rows)
-		current_page_data = data_rows[start_idx:end_idx]
+		end_idx = min(start_idx + rows_per_page, rows)
+		data_rows = table_data.values.tolist()
+		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
 		if(start_slide_index is not None and page_num == 0):
 			slide = ppt.slides[slide_n - 1]
@@ -797,9 +791,9 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
 			if(table_max_rows_per_slide is not None):
-				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
 			else:
-				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
 		else:
 			tf.paragraphs[0].text = table_name
 		tf.paragraphs[0].font.size = Pt(8)
@@ -807,7 +801,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
 
 	ppt.save(output_ppt_file)
-	return total_rows
+	return rows
 
 
 def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):

From 22edc4ba97d3151fb7e1996bf38ddb8df223120e Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:43:54 +0000
Subject: [PATCH 10/24] feat: make warning on empty file more specific

---
 Script/PRONTO.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 2efa4be..4d861fb 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -738,7 +738,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	try:
 		table_data = pandas.read_csv(table_file, sep='\t')
 	except pandas.errors.EmptyDataError:
-		logging.warning("The file is empty.")
+		logging.warning("{} is empty".format(table_file))
 		return
 	
 	# add empty columns for missing header columns and move additional columns to the right

From 7f5509ac63491077e045db2c57aacbe4a3238992 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 14:24:11 +0000
Subject: [PATCH 11/24] feat: simply variable setting via if statement

---
 Script/PRONTO.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 4d861fb..5fcb630 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -29,6 +29,7 @@
 from copy import deepcopy
 import pronto.pronto as pronto
 import pandas
+import math
 from pdf2image import convert_from_path
 
 runID = ""
@@ -748,20 +749,16 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	cols = len(table_header)
 	rows = len(table_data)
 
-	# how many slides, rows per slide, and start slide index
-	if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide):
-		total_slides_needed = 1
-		rows_per_page = rows
-		start_slide_index = slide_n
-	else:
-		total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
-		rows_per_page = table_max_rows_per_slide
-		start_slide_index = None
+	# how many slides, and start slide index
+	if not table_max_rows_per_slide:
+		table_max_rows_per_slide = rows
+	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
+	start_slide_index = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
-		start_idx = page_num * rows_per_page
-		end_idx = min(start_idx + rows_per_page, rows)
+		start_idx = page_num * table_max_rows_per_slide
+		end_idx = min(start_idx + table_max_rows_per_slide, rows)
 		data_rows = table_data.values.tolist()
 		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
@@ -790,7 +787,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
-			if(table_max_rows_per_slide is not None):
+			if(total_slides_needed > 1):
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
 			else:
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"

From 282a54ec29d9bda552fcf74aeafd8623bd709649 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Tue, 3 Feb 2026 14:26:50 +0000
Subject: [PATCH 12/24] feat: round floats in AF_tumor_DNA to 2 decimal places

---
 Script/PRONTO.py | 3 +++
 pronto/pronto.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 5fcb630..23c543a 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -745,6 +745,9 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	# add empty columns for missing header columns and move additional columns to the right
 	table_data = pronto.normalize_column_index(table_data, table_header)
 
+	# round floats to 2 decimal places
+	table_data = pronto.set_column_to_2_decimals(table_data, "AF_tumor_DNA")
+
 	# determine column and row number
 	cols = len(table_header)
 	rows = len(table_data)
diff --git a/pronto/pronto.py b/pronto/pronto.py
index c5ff8e1..ca34994 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -39,4 +39,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 		df[i] = ' '
 	# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
 	all_col_idx = exp_col_idx + add_col_idx
-	return df[all_col_idx]
\ No newline at end of file
+	return df[all_col_idx]
+
+def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
+	if col_name in df.columns:
+		df[col_name] = df[col_name].map('{:.2f}'.format)
+	else:
+		logging.info("Column {} not found in dataframe".format(col_name))
+	return df
\ No newline at end of file

From b88f6523cdddd49eb0b6b1a580147f610d20c7d7 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Tue, 3 Feb 2026 14:27:12 +0000
Subject: [PATCH 13/24] test: add unittests for rounding function

---
 pronto/tests/pronto_test.py | 50 +++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 9213355..d8ebbf2 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -181,4 +181,54 @@ def test_glob_tsoppi_file(inputs, exception, want):
 def test_normalize_column_index(inputs, exception, want):
     with exception:
         get = pronto.pronto.normalize_column_index(*inputs)
+        assert want.equals(get)
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3.333, 4.444],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": ["3.33", "4.44"],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3.666, 4.777],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": ["3.67", "4.78"],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+            }),
+        ),
+    ]
+)
+def test_set_column_to_2_decimals(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.set_column_to_2_decimals(*inputs)
         assert want.equals(get)
\ No newline at end of file

From 2ec761f8c21aca43ecf7ee0909b749163fb7cbd3 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Wed, 4 Feb 2026 08:16:54 +0000
Subject: [PATCH 14/24] style: be consistent with using idx for index

---
 Script/PRONTO.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 23c543a..eed437d 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -756,7 +756,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	if not table_max_rows_per_slide:
 		table_max_rows_per_slide = rows
 	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
-	start_slide_index = None if total_slides_needed > 1 else slide_n
+	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
@@ -765,7 +765,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		data_rows = table_data.values.tolist()
 		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
-		if(start_slide_index is not None and page_num == 0):
+		if(start_slide_idx is not None and slide_idx == 0):
 			slide = ppt.slides[slide_n - 1]
 		else:
 			slide = ppt.slides.add_slide(ppt.slide_layouts[6])

From 7f080ea093e3356d94ec2e21ee22db82d8690a3e Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Wed, 4 Feb 2026 08:22:22 +0000
Subject: [PATCH 15/24] chore: rename page_num to slide_idx to be consistent
 with naming

---
 Script/PRONTO.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index eed437d..bf57686 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -759,7 +759,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
-	for page_num in range(total_slides_needed):
+	for slide_idx in range(total_slides_needed):
 		start_idx = page_num * table_max_rows_per_slide
 		end_idx = min(start_idx + table_max_rows_per_slide, rows)
 		data_rows = table_data.values.tolist()
@@ -791,7 +791,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
 			if(total_slides_needed > 1):
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")"
 			else:
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
 		else:

From a81fa30c6c2bf436cac8d9c43ac22ef880a5911b Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:56:22 +0000
Subject: [PATCH 16/24] fix: handle rouding if type is string

---
 pronto/pronto.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index ca34994..f91a22e 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -43,6 +43,8 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
+		if df[col_name].dtype != float:
+			df[col_name] = df[col_name].astype(float)
 		df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))

From afdbf800146a0e58ff74a964db06ab3ec3a098f9 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:57:10 +0000
Subject: [PATCH 17/24] test: include test case for decimal rounding that
 contains strings

---
 pronto/tests/pronto_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index d8ebbf2..3bbc96d 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -204,7 +204,7 @@ def test_normalize_column_index(inputs, exception, want):
             (
                 pandas.DataFrame({
                     "one": [1, 2],
-                    "two": [3.666, 4.777],
+                    "two": ['3.666', '4.777'],
                 }),
                 "two",
             ),

From 976c0ca1dcd4d4bb8634dced6dcdcf130c178966 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:59:45 +0000
Subject: [PATCH 18/24] feat: introduce functions to get table data per slide
 and add table name to slide

---
 pronto/pronto.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index f91a22e..94b5809 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import pandas
+import pptx
 
 # get tumor mutational burden label
 def get_tmb_string(val):
@@ -48,4 +49,31 @@ def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 		df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))
-	return df
\ No newline at end of file
+	return df
+
+# get data fitting on one slide based on slide index and max rows per slide
+def get_slide_table_data(df: pandas.DataFrame, slide_idx: int, max_rows: int):
+	start = slide_idx * max_rows
+	stop = min(start + max_rows, len(df))
+	if start >= len(df):
+		return []
+	table = df.values.tolist()
+	header = [df.columns.tolist()]
+	table_data = header + table[start:stop]
+	return table_data
+
+# add constructed table name to slide and format the textbox
+def add_table_name(shapes: pptx.shapes.shapetree.SlideShapes, table_name: str, left: float, top: float, width: float, height: float, font_size: float, print_row_num: bool, slide_idx: int, total_slides: int, rows: int):
+
+	# add textbox to slide
+	paragraph = shapes.add_textbox(pptx.util.Inches(left), pptx.util.Inches(top), pptx.util.Inches(width), pptx.util.Inches(height)).text_frame.paragraphs[0]
+
+	# construct table name with optional row number and slide count
+	part_1 = ", Page {}/{}".format(slide_idx + 1, total_slides) if total_slides > 1 else ''
+	part_2 = " (N={}{})".format(rows, part_1) if print_row_num else ''
+	paragraph.text = "{}{}".format(table_name, part_2)
+
+	# font formatting and placement
+	paragraph.font.size = pptx.util.Pt(font_size)
+	paragraph.font.bold = True
+	paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER

From 51628f8cc34c1648b7bb783049ab22edfb2c338c Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:00:06 +0000
Subject: [PATCH 19/24] style: add comments to code

---
 pronto/pronto.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 94b5809..3712a9a 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -30,6 +30,7 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
 		logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
 		raise ValueError
 
+# normalize dataframe to expected column indices
 def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	# determine current, missing and additional column indices
 	curr_col_idx = df.columns.tolist()
@@ -42,6 +43,7 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	all_col_idx = exp_col_idx + add_col_idx
 	return df[all_col_idx]
 
+# set dataframe column format to 2 decimal points
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
 		if df[col_name].dtype != float:

From a686731ffa14ce6b471747e011c7d79aea9c922c Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:00:34 +0000
Subject: [PATCH 20/24] test: add tests for table data and table name functions

---
 pronto/tests/pronto_test.py | 186 +++++++++++++++++++++++++++++++++++-
 1 file changed, 185 insertions(+), 1 deletion(-)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 3bbc96d..7192c32 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -1,4 +1,5 @@
 import pandas
+import pptx
 import pytest
 import pronto.pronto
 
@@ -231,4 +232,187 @@ def test_normalize_column_index(inputs, exception, want):
 def test_set_column_to_2_decimals(inputs, exception, want):
     with exception:
         get = pronto.pronto.set_column_to_2_decimals(*inputs)
-        assert want.equals(get)
\ No newline at end of file
+        assert want.equals(get)
+
+def list_of_lists_equal(list1, list2):
+    if len(list1) != len(list2):
+        return False
+    for sublist1, sublist2 in zip(list1, list2):
+        print(sublist1, sublist2)
+        if sublist1 != sublist2:
+            return False
+    return True
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                }),
+                0,
+                3,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [1, 3],
+                [2, 4],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3 ,4],
+                    "two": [5, 6, 7, 8],
+                }),
+                1,
+                2,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [3, 7],
+                [4, 8],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3],
+                    "two": [5, 6, 7],
+                }),
+                1,
+                2,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [3, 7],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3],
+                    "two": [5, 6, 7],
+                }),
+                2,
+                2,
+            ),
+            does_not_raise(),
+            [],
+        ),
+    ]
+)
+def test_get_slide_table_data(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.get_slide_table_data(*inputs)
+        assert list_of_lists_equal(get, want)
+
+def check_shape(shape, want_left, want_top, want_width, want_height):
+    assert shape.left == pptx.util.Inches(want_left)
+    assert shape.top == pptx.util.Inches(want_top)
+    assert shape.width == pptx.util.Inches(want_width)
+    assert shape.height == pptx.util.Inches(want_height)
+
+def check_paragraph(paragraph, want_text, want_font_size, want_bold, want_alignment):
+    assert paragraph.text == want_text
+    assert paragraph.font.size.pt == want_font_size
+    assert paragraph.font.bold == want_bold
+    assert paragraph.alignment == want_alignment
+
+@pytest.mark.parametrize(
+    "inputs, exception, want_shape, want_paragraph",
+    [
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                True,
+                0,
+                3,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test (N=4, Page 1/3)',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                True,
+                0,
+                1,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test (N=4)',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                False,
+                0,
+                3,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+    ]
+)
+def test_add_table_name(inputs, exception, want_shape, want_paragraph):
+    with exception:
+        shapes = pptx.Presentation().slides.add_slide(pptx.Presentation().slide_layouts[6]).shapes
+        pronto.pronto.add_table_name(shapes, *inputs)
+        check_shape(shapes[0], *want_shape)
+        check_paragraph(shapes[0].text_frame.paragraphs[0], *want_paragraph)
\ No newline at end of file

From bca5f7e78329d0f996b0075bc219163fa44f3276 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:02:19 +0000
Subject: [PATCH 21/24] feat: refactor and simplify insert_table_to_ppt

---
 Script/PRONTO.py | 60 +++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 36 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index bf57686..c5be538 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -733,7 +733,7 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
 	ppt.save(output_ppt_file)
 
 
-def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
+def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,print_row_num,table_column_width,table_max_rows_per_slide):
 
 	# load table data
 	try:
@@ -752,53 +752,41 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	cols = len(table_header)
 	rows = len(table_data)
 
-	# how many slides, and start slide index
+	# how many slides are required
 	if not table_max_rows_per_slide:
 		table_max_rows_per_slide = rows
 	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
-	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
+	# Add data to ppt
 	ppt = Presentation(output_ppt_file)
 	for slide_idx in range(total_slides_needed):
-		start_idx = page_num * table_max_rows_per_slide
-		end_idx = min(start_idx + table_max_rows_per_slide, rows)
-		data_rows = table_data.values.tolist()
-		current_page_data = data_rows[start_idx:end_idx] # use df
-		current_page_rows = len(current_page_data)
-		if(start_slide_idx is not None and slide_idx == 0):
-			slide = ppt.slides[slide_n - 1]
+		current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide)
+		if(total_slides_needed == 1):
+			shapes = ppt.slides[slide_n - 1].shapes
 		else:
-			slide = ppt.slides.add_slide(ppt.slide_layouts[6])
-		shapes = slide.shapes
+			shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes
+
+		# create new table on slide
 		left = Inches(left_t)
 		top = Inches(top_t)
 		width = Inches(width_t)
 		height = Inches(height_t)
-		table_rows = current_page_rows + 1
+		table_rows = len(current_slide_data)
 		table = shapes.add_table(table_rows,cols,left,top,width,height).table
-		for c in range(cols):
-			if table_column_width:
-				table.columns[c].width = Inches(table_column_width[c])
-			table.cell(0,c).text = table_header[c]
-			table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)
-
-		for row_idx, row_data in enumerate(current_page_data, start=1):
-			for col_idx in range(cols):
-				table.cell(row_idx,col_idx).text = str(row_data[col_idx])
-				table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)
-
-		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
-		tf = textbox.text_frame
-		if(if_print_rowNo == True):
-			if(total_slides_needed > 1):
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")"
-			else:
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
-		else:
-			tf.paragraphs[0].text = table_name
-		tf.paragraphs[0].font.size = Pt(8)
-		tf.paragraphs[0].font.bold = True
-		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
+
+		# if table_column_width is provided, set the column width
+		if len(table_column_width) == cols:
+			for col_idx, width in enumerate(table_column_width):
+				table.columns[col_idx].width = Inches(width)
+		
+		# fill in the table data and set font size
+		for row_idx, row in enumerate(table.rows):
+			for col_idx, cell in enumerate(row.cells):
+				cell.text = current_slide_data[row_idx][col_idx]
+				cell.text_frame.paragraphs[0].font.size = Pt(font_size)
+
+		# add table title
+		pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
 
 	ppt.save(output_ppt_file)
 	return rows

From ce3dcedfcefef7dc3691a2bb3df6a2fa843739a2 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 12 Feb 2026 12:01:02 +0000
Subject: [PATCH 22/24] fix: use correct function name, thanks @xiaoliz0

---
 Script/PRONTO.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index c5be538..2062067 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -786,7 +786,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 				cell.text_frame.paragraphs[0].font.size = Pt(font_size)
 
 		# add table title
-		pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
+		pronto.add_table_name(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
 
 	ppt.save(output_ppt_file)
 	return rows

From bb2daccad7a32f0cd91a175bcfc0558186ef5910 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Fri, 13 Feb 2026 12:40:50 +0000
Subject: [PATCH 23/24] fix: handle strings with % and floats in
 set_column_to_2_decimals

---
 pronto/pronto.py            | 7 +++----
 pronto/tests/pronto_test.py | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 3712a9a..fc22dde 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -43,12 +43,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	all_col_idx = exp_col_idx + add_col_idx
 	return df[all_col_idx]
 
-# set dataframe column format to 2 decimal points
+# set dataframe column format to 2 decimal points if float type
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
-		if df[col_name].dtype != float:
-			df[col_name] = df[col_name].astype(float)
-		df[col_name] = df[col_name].map('{:.2f}'.format)
+		if df[col_name].dtype == float:
+			df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))
 	return df
diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 7192c32..e58028e 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -205,14 +205,14 @@ def test_normalize_column_index(inputs, exception, want):
             (
                 pandas.DataFrame({
                     "one": [1, 2],
-                    "two": ['3.666', '4.777'],
+                    "two": ['21.0%', '0.5%'],
                 }),
                 "two",
             ),
             does_not_raise(),
             pandas.DataFrame({
                 "one": [1, 2],
-                "two": ["3.67", "4.78"],
+                "two": ["21.0%", "0.5%"],
             }),
         ),
         (

From 26af7f02f1307fede380b1adee740e458f4fda6a Mon Sep 17 00:00:00 2001
From: Martin Rippin <74295098+marrip@users.noreply.github.com>
Date: Wed, 29 Apr 2026 09:18:53 +0200
Subject: [PATCH 24/24] fix: apply @xiaoliz0 's suggestions

Co-authored-by: Xiaoli Zhang <81294502+xiaoliz0@users.noreply.github.com>
---
 Script/PRONTO.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 2062067..8b07304 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -759,9 +759,10 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 
 	# Add data to ppt
 	ppt = Presentation(output_ppt_file)
+	total_slides = len(ppt.slides)
 	for slide_idx in range(total_slides_needed):
 		current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide)
-		if(total_slides_needed == 1):
+		if(total_slides_needed == 1 and slide_n <= total_slides):
 			shapes = ppt.slides[slide_n - 1].shapes
 		else:
 			shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes
@@ -782,7 +783,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		# fill in the table data and set font size
 		for row_idx, row in enumerate(table.rows):
 			for col_idx, cell in enumerate(row.cells):
-				cell.text = current_slide_data[row_idx][col_idx]
+				cell.text = str(current_slide_data[row_idx][col_idx])
 				cell.text_frame.paragraphs[0].font.size = Pt(font_size)
 
 		# add table title