From 9278f15eb5009d30cf8413bd2fe7c418341e7d72 Mon Sep 17 00:00:00 2001 From: xiaoliz0 Date: Fri, 9 Jan 2026 14:35:16 +0100 Subject: [PATCH 01/24] Add new function to split the table generated in the end of the report when it is long. --- Config/configure_PRONTO.ini | 2 + Script/PRONTO.py | 80 +++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/Config/configure_PRONTO.ini b/Config/configure_PRONTO.ini index 0540539..411330a 100644 --- a/Config/configure_PRONTO.ini +++ b/Config/configure_PRONTO.ini @@ -9,6 +9,8 @@ data_path = /data/sample_data/analysis_results/ encoding_sys = utf-8 ;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables): filter_col_nu = 5 +;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables. +table_max_rows_per_slide = 15 ;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files. material_file_version = 2025 diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 4f6d128..50f61ae 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -784,6 +784,78 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h, return data_nrows +def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): + table_file = open(table_data_file) + lines = table_file.readlines() + if not lines: + return + first_line = lines[0] + rows = len(lines) + first_line_cells = first_line.split('\t') + cols = len(table_header) + header_not_exist_in_table = [] + for n in range(len(table_header)): + if_exist = False + if(table_header[n] in first_line_cells): + if_exist = True + if not if_exist: + header_not_exist_in_table.append(n) + data_rows = [] + for line in lines: + if(line != first_line): + line_cells = line.strip('\t') + if header_not_exist_in_table: + for num in header_not_exist_in_table: + line_cells.insert(num," ") + row_data = [cell.strip() for cell in line.split('\t')] + data_rows.append(row_data) + + ppt = Presentation(output_ppt_file) + if(rows <= table_max_rows_per_slide): + total_slides_needed = 1 + else: + total_slides_needed = rows // table_max_rows_per_slide + 1 + + total_rows = len (data_rows) + start_idx = 0 + while start_idx < total_rows: + end_idx = min(start_idx + table_max_rows_per_slide, total_rows) + slide_data = data_rows[start_idx:end_idx] + slide = ppt.slides.add_slide(ppt.slide_layouts[6]) + shapes = slide.shapes + left = Inches(left_t) + top = Inches(top_t) + width = Inches(width_t) + height = Inches(height_t) + table_rows = len(slide_data) + 1 + table = shapes.add_table(table_rows,cols,left,top,width,height).table + for c in range(cols): + if table_column_width: + table.columns[c].width = Inches(table_column_width[c]) + table.cell(0,c).text = table_header[c] + table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size) + + for row_idx, row_data in enumerate(slide_data, start=1): + for col_idx in range(cols): + table.cell(row_idx,col_idx).text = str(row_data[col_idx]) + table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size) + + start_idx = end_idx + + textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) + tf = textbox.text_frame + if(if_print_rowNo == True): + tf.paragraphs[0].text = table_name +" (N=" + str(table_rows - 1) + ")" + else: + tf.paragraphs[0].text = table_name + tf.paragraphs[0].font.size = Pt(8) + tf.paragraphs[0].font.bold = True + tf.paragraphs[0].alignment = PP_ALIGN.CENTER + + ppt.save(output_ppt_file) + return total_slides_needed + + def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file): DNA_summary_file = open(DNA_variant_summary_file) global str_TMB_DRUP @@ -1534,7 +1606,8 @@ def main(argv): slide8_table_font_size = 7 if_print_rowNo = True table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53] - slide8_table_nrows = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width) + table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - 1 + slide8_table_slides = insert_table_to_ppt_end(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) # Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report. A2_to_extract=[2] @@ -1545,12 +1618,13 @@ def main(argv): B3_C1_to_extract = [4, 5] pdf_page_image_to_ppt(CNV_overview_plots_pdf,output_ppt_file,B3_C1_to_extract,width_scale=1,height_scale=0.5) - # Change slides order. + # Change slides order. ppt = Presentation(output_ppt_file) + slide_count = len(ppt.slides) slides = ppt.slides._sldIdLst slides_list = list(slides) slides.remove(slides_list[7]) - slides.insert(12,slides_list[7]) + slides.insert(slide_count + 1,slides_list[7]) ppt.save(output_ppt_file) print("Generate report for " + DNA_sampleID) ppt_nr += 1 From 813bfdfde0a34965694d5e2dca74b7d3d7f32f12 Mon Sep 17 00:00:00 2001 From: xiaoliz0 Date: Tue, 13 Jan 2026 12:04:07 +0100 Subject: [PATCH 02/24] Update header information for the table in the end based on the comments from biology group. --- Script/PRONTO.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 50f61ae..9e0286f 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -818,6 +818,7 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt total_rows = len (data_rows) start_idx = 0 + table_page_num = 1 while start_idx < total_rows: end_idx = min(start_idx + table_max_rows_per_slide, total_rows) slide_data = data_rows[start_idx:end_idx] @@ -845,12 +846,13 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) tf = textbox.text_frame if(if_print_rowNo == True): - tf.paragraphs[0].text = table_name +" (N=" + str(table_rows - 1) + ")" + tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(table_page_num) + "/" + str(total_slides_needed) + ")" else: tf.paragraphs[0].text = table_name tf.paragraphs[0].font.size = Pt(8) tf.paragraphs[0].font.bold = True tf.paragraphs[0].alignment = PP_ALIGN.CENTER + table_page_num = table_page_num + 1 ppt.save(output_ppt_file) return total_slides_needed From 64d8093430a123199bc6a31214a4131d11a3157e Mon Sep 17 00:00:00 2001 From: xiaoliz0 Date: Tue, 20 Jan 2026 13:35:53 +0100 Subject: [PATCH 03/24] Update codes based on comments from Matin. Merge the two functions of inserting table into one function. --- Config/configure_PRONTO.ini | 2 +- Script/PRONTO.py | 127 ++++++++++-------------------------- 2 files changed, 37 insertions(+), 92 deletions(-) diff --git a/Config/configure_PRONTO.ini b/Config/configure_PRONTO.ini index 411330a..7b74237 100644 --- a/Config/configure_PRONTO.ini +++ b/Config/configure_PRONTO.ini @@ -10,7 +10,7 @@ encoding_sys = utf-8 ;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables): filter_col_nu = 5 ;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables. -table_max_rows_per_slide = 15 +table_max_rows_per_slide = 14 ;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files. material_file_version = 2025 diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 9e0286f..f3dee15 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -724,73 +724,12 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_ ppt.save(output_ppt_file) -def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width): +def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): table_file = open(table_data_file) lines = table_file.readlines() if not lines: return first_line = lines[0] - rows = len(lines) - first_line_cells = first_line.split('\t') - cols = len(table_header) - header_not_exist_in_table = [] - for n in range(len(table_header)): - if_exist = False - if(table_header[n] in first_line_cells): - if_exist = True - if not if_exist: - header_not_exist_in_table.append(n) - ppt = Presentation(output_ppt_file) - try: - slide = ppt.slides[slide_n-1] - except: - slide = ppt.slides.add_slide(ppt.slide_layouts[6]) - shapes = slide.shapes - left = Inches(left_t) - top = Inches(top_t) - width = Inches(width_t) - height = Inches(height_t) - table = shapes.add_table(rows,cols,left,top,width,height).table - table_rows = rows-1 - for c in range(cols): - if table_column_width: - table.columns[c].width = Inches(table_column_width[c]) - table.cell(0,c).text = table_header[c] - table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size) - - row = 1 - for line in open(table_data_file): - if(line != first_line): - line_cells = line.split('\t') - if header_not_exist_in_table: - for num in header_not_exist_in_table: - line_cells.insert(num," ") - for j in range(len(line_cells) - 1): - table.cell(row,j).text = str(line_cells[j]) - table.cell(row,j).text_frame.paragraphs[0].font.size = Pt(font_size) - row += 1 - textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) - tf = textbox.text_frame - if(if_print_rowNo == True): - tf.paragraphs[0].text = table_name +" (N=" + str(table_rows) + ")" - else: - tf.paragraphs[0].text = table_name - tf.paragraphs[0].font.size = Pt(8) - tf.paragraphs[0].font.bold = True - tf.paragraphs[0].alignment = PP_ALIGN.CENTER - - ppt.save(output_ppt_file) - data_nrows = table_rows - return data_nrows - - -def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): - table_file = open(table_data_file) - lines = table_file.readlines() - if not lines: - return - first_line = lines[0] - rows = len(lines) first_line_cells = first_line.split('\t') cols = len(table_header) header_not_exist_in_table = [] @@ -801,34 +740,40 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt if not if_exist: header_not_exist_in_table.append(n) data_rows = [] - for line in lines: - if(line != first_line): - line_cells = line.strip('\t') - if header_not_exist_in_table: - for num in header_not_exist_in_table: - line_cells.insert(num," ") - row_data = [cell.strip() for cell in line.split('\t')] - data_rows.append(row_data) + for line in lines[1:]: + line_cells = line.strip('\t') + if header_not_exist_in_table: + for num in header_not_exist_in_table: + line_cells.insert(num," ") + row_data = [cell.strip() for cell in line.split('\t')] + data_rows.append(row_data) + total_rows = len(data_rows) ppt = Presentation(output_ppt_file) - if(rows <= table_max_rows_per_slide): + if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide): total_slides_needed = 1 + rows_per_page = total_rows + start_slide_index = slide_n else: - total_slides_needed = rows // table_max_rows_per_slide + 1 - - total_rows = len (data_rows) - start_idx = 0 - table_page_num = 1 - while start_idx < total_rows: - end_idx = min(start_idx + table_max_rows_per_slide, total_rows) - slide_data = data_rows[start_idx:end_idx] - slide = ppt.slides.add_slide(ppt.slide_layouts[6]) + total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide + rows_per_page = table_max_rows_per_slide + start_slide_index = None + + for page_num in range(total_slides_needed): + start_idx = page_num * rows_per_page + end_idx = min(start_idx + rows_per_page, total_rows) + current_page_data = data_rows[start_idx:end_idx] + current_page_rows = len(current_page_data) + if(start_slide_index is not None and page_num == 0): + slide = ppt.slides[slide_n - 1] + else: + slide = ppt.slides.add_slide(ppt.slide_layouts[6]) shapes = slide.shapes left = Inches(left_t) top = Inches(top_t) width = Inches(width_t) height = Inches(height_t) - table_rows = len(slide_data) + 1 + table_rows = current_page_rows + 1 table = shapes.add_table(table_rows,cols,left,top,width,height).table for c in range(cols): if table_column_width: @@ -836,26 +781,26 @@ def insert_table_to_ppt_end(table_data_file,slide_n,table_name,left_h,top_h,widt table.cell(0,c).text = table_header[c] table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size) - for row_idx, row_data in enumerate(slide_data, start=1): + for row_idx, row_data in enumerate(current_page_data, start=1): for col_idx in range(cols): table.cell(row_idx,col_idx).text = str(row_data[col_idx]) table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size) - start_idx = end_idx - textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) tf = textbox.text_frame if(if_print_rowNo == True): - tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(table_page_num) + "/" + str(total_slides_needed) + ")" + if(table_max_rows_per_slide is not None): + tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")" + else: + tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")" else: tf.paragraphs[0].text = table_name tf.paragraphs[0].font.size = Pt(8) tf.paragraphs[0].font.bold = True tf.paragraphs[0].alignment = PP_ALIGN.CENTER - table_page_num = table_page_num + 1 ppt.save(output_ppt_file) - return total_slides_needed + return total_rows, total_slides_needed def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file): @@ -1588,7 +1533,7 @@ def main(argv): slide6_table_font_size = 7 if_print_rowNo = False for table_index in slide6_table_ppSlide: - slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[]) + slide6_table_nrows, _ = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None) output_file_preMTB_AppendixTable = output_file_preMTB_table_path + "_preMTBTable_Appendix.txt" output_table_file_filterResults_AllReporVariants_CodingRegion = output_file_preMTB_table_path + "_AllReporVariants_CodingRegion.txt" stable_text = update_ppt_variant_summary_table(slide6_table_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file) @@ -1608,8 +1553,8 @@ def main(argv): slide8_table_font_size = 7 if_print_rowNo = True table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53] - table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - 1 - slide8_table_slides = insert_table_to_ppt_end(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) + table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) + _, slide8_table_slides_added = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) # Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report. A2_to_extract=[2] @@ -1626,7 +1571,7 @@ def main(argv): slides = ppt.slides._sldIdLst slides_list = list(slides) slides.remove(slides_list[7]) - slides.insert(slide_count + 1,slides_list[7]) + slides.append(slides_list[7]) ppt.save(output_ppt_file) print("Generate report for " + DNA_sampleID) ppt_nr += 1 From 995f7a82421a5dbe9efb4fbca5864cd9b15801ab Mon Sep 17 00:00:00 2001 From: xiaoliz0 Date: Tue, 27 Jan 2026 09:16:51 +0100 Subject: [PATCH 04/24] Update codes based on the comments from Martin. --- Script/PRONTO.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index f3dee15..dd77f7e 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -741,7 +741,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h, header_not_exist_in_table.append(n) data_rows = [] for line in lines[1:]: - line_cells = line.strip('\t') + line_cells = line.split('\t') if header_not_exist_in_table: for num in header_not_exist_in_table: line_cells.insert(num," ") @@ -1567,7 +1567,6 @@ def main(argv): # Change slides order. ppt = Presentation(output_ppt_file) - slide_count = len(ppt.slides) slides = ppt.slides._sldIdLst slides_list = list(slides) slides.remove(slides_list[7]) From 18362bcde967436a35ae2f3dee3b125ebf5affdf Mon Sep 17 00:00:00 2001 From: xiaoliz0 Date: Tue, 27 Jan 2026 12:47:29 +0100 Subject: [PATCH 05/24] Remove the return value for total number of added slides for the table in the end of the report, sinice it is not in use in any other places. --- Script/PRONTO.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index dd77f7e..d0630e7 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -800,7 +800,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h, tf.paragraphs[0].alignment = PP_ALIGN.CENTER ppt.save(output_ppt_file) - return total_rows, total_slides_needed + return total_rows def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file): @@ -1533,7 +1533,7 @@ def main(argv): slide6_table_font_size = 7 if_print_rowNo = False for table_index in slide6_table_ppSlide: - slide6_table_nrows, _ = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None) + slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None) output_file_preMTB_AppendixTable = output_file_preMTB_table_path + "_preMTBTable_Appendix.txt" output_table_file_filterResults_AllReporVariants_CodingRegion = output_file_preMTB_table_path + "_AllReporVariants_CodingRegion.txt" stable_text = update_ppt_variant_summary_table(slide6_table_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file) @@ -1554,7 +1554,7 @@ def main(argv): if_print_rowNo = True table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53] table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - _, slide8_table_slides_added = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) + insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) # Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report. A2_to_extract=[2] From bebe60c66f28c9d85d92049d3165deffb037c631 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 13:35:12 +0000 Subject: [PATCH 06/24] chore: add pandas and sort alphabetically --- requirements-test.txt | 4 ++-- requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index b30f9dc..517e35c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.3 +pytest-emoji==0.2.0 pytest-md==0.2.0 -pytest-emoji==0.2.0 \ No newline at end of file +pytest==7.4.3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 64993a3..d0ec330 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ click==8.1.7 +pandas==2.2.3 +pdf2image==1.17.0 python-docx==1.1.0 python-pptx==0.6.23 xlrd==2.0.1 xlutils==2.0.0 -pdf2image==1.17.0 From 2d84df795dbfe8bac062fbd6dc2549c3a98abf1f Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 13:36:00 +0000 Subject: [PATCH 07/24] feat: add function to normalize column indices --- pronto/pronto.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pronto/pronto.py b/pronto/pronto.py index 3f87cbb..c5ff8e1 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -1,6 +1,7 @@ import glob import logging import os +import pandas # get tumor mutational burden label def get_tmb_string(val): @@ -27,3 +28,15 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units): else: logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus)) raise ValueError + +def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): + # determine current, missing and additional column indices + curr_col_idx = df.columns.tolist() + miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx)) + add_col_idx = list(set(curr_col_idx) - set(exp_col_idx)) + # add missing column indices + for i in miss_col_idx: + df[i] = ' ' + # combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right + all_col_idx = exp_col_idx + add_col_idx + return df[all_col_idx] \ No newline at end of file From fd2e0acfcd06c69bc49ac8fd23f4439aeba10b07 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 13:36:41 +0000 Subject: [PATCH 08/24] test: add unittests for column index normalization function --- pronto/tests/pronto_test.py | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py index c0e6aa7..9213355 100644 --- a/pronto/tests/pronto_test.py +++ b/pronto/tests/pronto_test.py @@ -1,3 +1,4 @@ +import pandas import pytest import pronto.pronto @@ -99,3 +100,85 @@ def test_get_tmb_string(input, exception, want): def test_glob_tsoppi_file(inputs, exception, want): with exception: assert pronto.pronto.glob_tsoppi_file(*inputs) == want + +@pytest.mark.parametrize( + "inputs, exception, want", + [ + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "three": [5, 6], + "four": [7, 8], + }), + ["one", "two", "three", "four"], + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "three": [5, 6], + "four": [7, 8], + }), + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "four": [7, 8], + }), + ["one", "two", "three", "four"], + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "three": [' ', ' '], + "four": [7, 8], + }), + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "three": [5, 6], + "four": [7, 8], + }), + ["two", "three", "four"], + ), + does_not_raise(), + pandas.DataFrame({ + "two": [3, 4], + "three": [5, 6], + "four": [7, 8], + "one": [1, 2], + }), + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "four": [7, 8], + "five": [9, 10], + }), + ["one", "two", "three", "four"], + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + "three": [' ', ' '], + "four": [7, 8], + "five": [9, 10], + }), + ), + ] +) +def test_normalize_column_index(inputs, exception, want): + with exception: + get = pronto.pronto.normalize_column_index(*inputs) + assert want.equals(get) \ No newline at end of file From 09e6aee619e1d6b958cef5aab48f21c3680d8488 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 13:42:39 +0000 Subject: [PATCH 09/24] feat: use column index normalizing function in main script, add comments and clean up --- Script/PRONTO.py | 56 +++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 3aa3c4a..2efa4be 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -28,6 +28,7 @@ from decimal import Decimal from copy import deepcopy import pronto.pronto as pronto +import pandas from pdf2image import convert_from_path runID = "" @@ -731,45 +732,38 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_ ppt.save(output_ppt_file) -def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): - table_file = open(table_data_file) - lines = table_file.readlines() - if not lines: +def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): + + # load table data + try: + table_data = pandas.read_csv(table_file, sep='\t') + except pandas.errors.EmptyDataError: + logging.warning("The file is empty.") return - first_line = lines[0] - first_line_cells = first_line.split('\t') + + # add empty columns for missing header columns and move additional columns to the right + table_data = pronto.normalize_column_index(table_data, table_header) + + # determine column and row number cols = len(table_header) - header_not_exist_in_table = [] - for n in range(len(table_header)): - if_exist = False - if(table_header[n] in first_line_cells): - if_exist = True - if not if_exist: - header_not_exist_in_table.append(n) - data_rows = [] - for line in lines[1:]: - line_cells = line.split('\t') - if header_not_exist_in_table: - for num in header_not_exist_in_table: - line_cells.insert(num," ") - row_data = [cell.strip() for cell in line.split('\t')] - data_rows.append(row_data) - total_rows = len(data_rows) + rows = len(table_data) - ppt = Presentation(output_ppt_file) - if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide): + # how many slides, rows per slide, and start slide index + if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide): total_slides_needed = 1 - rows_per_page = total_rows + rows_per_page = rows start_slide_index = slide_n else: - total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide + total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide rows_per_page = table_max_rows_per_slide start_slide_index = None + ppt = Presentation(output_ppt_file) for page_num in range(total_slides_needed): start_idx = page_num * rows_per_page - end_idx = min(start_idx + rows_per_page, total_rows) - current_page_data = data_rows[start_idx:end_idx] + end_idx = min(start_idx + rows_per_page, rows) + data_rows = table_data.values.tolist() + current_page_data = data_rows[start_idx:end_idx] # use df current_page_rows = len(current_page_data) if(start_slide_index is not None and page_num == 0): slide = ppt.slides[slide_n - 1] @@ -797,9 +791,9 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h, tf = textbox.text_frame if(if_print_rowNo == True): if(table_max_rows_per_slide is not None): - tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")" + tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")" else: - tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")" + tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")" else: tf.paragraphs[0].text = table_name tf.paragraphs[0].font.size = Pt(8) @@ -807,7 +801,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h, tf.paragraphs[0].alignment = PP_ALIGN.CENTER ppt.save(output_ppt_file) - return total_rows + return rows def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file): From 22edc4ba97d3151fb7e1996bf38ddb8df223120e Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 13:43:54 +0000 Subject: [PATCH 10/24] feat: make warning on empty file more specific --- Script/PRONTO.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 2efa4be..4d861fb 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -738,7 +738,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ try: table_data = pandas.read_csv(table_file, sep='\t') except pandas.errors.EmptyDataError: - logging.warning("The file is empty.") + logging.warning("{} is empty".format(table_file)) return # add empty columns for missing header columns and move additional columns to the right From 7f5509ac63491077e045db2c57aacbe4a3238992 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 29 Jan 2026 14:24:11 +0000 Subject: [PATCH 11/24] feat: simply variable setting via if statement --- Script/PRONTO.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 4d861fb..5fcb630 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -29,6 +29,7 @@ from copy import deepcopy import pronto.pronto as pronto import pandas +import math from pdf2image import convert_from_path runID = "" @@ -748,20 +749,16 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ cols = len(table_header) rows = len(table_data) - # how many slides, rows per slide, and start slide index - if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide): - total_slides_needed = 1 - rows_per_page = rows - start_slide_index = slide_n - else: - total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide - rows_per_page = table_max_rows_per_slide - start_slide_index = None + # how many slides, and start slide index + if not table_max_rows_per_slide: + table_max_rows_per_slide = rows + total_slides_needed = math.ceil(rows / table_max_rows_per_slide) + start_slide_index = None if total_slides_needed > 1 else slide_n ppt = Presentation(output_ppt_file) for page_num in range(total_slides_needed): - start_idx = page_num * rows_per_page - end_idx = min(start_idx + rows_per_page, rows) + start_idx = page_num * table_max_rows_per_slide + end_idx = min(start_idx + table_max_rows_per_slide, rows) data_rows = table_data.values.tolist() current_page_data = data_rows[start_idx:end_idx] # use df current_page_rows = len(current_page_data) @@ -790,7 +787,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) tf = textbox.text_frame if(if_print_rowNo == True): - if(table_max_rows_per_slide is not None): + if(total_slides_needed > 1): tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")" else: tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")" From 282a54ec29d9bda552fcf74aeafd8623bd709649 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Tue, 3 Feb 2026 14:26:50 +0000 Subject: [PATCH 12/24] feat: round floats in AF_tumor_DNA to 2 decimal places --- Script/PRONTO.py | 3 +++ pronto/pronto.py | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 5fcb630..23c543a 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -745,6 +745,9 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ # add empty columns for missing header columns and move additional columns to the right table_data = pronto.normalize_column_index(table_data, table_header) + # round floats to 2 decimal places + table_data = pronto.set_column_to_2_decimals(table_data, "AF_tumor_DNA") + # determine column and row number cols = len(table_header) rows = len(table_data) diff --git a/pronto/pronto.py b/pronto/pronto.py index c5ff8e1..ca34994 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -39,4 +39,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): df[i] = ' ' # combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right all_col_idx = exp_col_idx + add_col_idx - return df[all_col_idx] \ No newline at end of file + return df[all_col_idx] + +def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str): + if col_name in df.columns: + df[col_name] = df[col_name].map('{:.2f}'.format) + else: + logging.info("Column {} not found in dataframe".format(col_name)) + return df \ No newline at end of file From b88f6523cdddd49eb0b6b1a580147f610d20c7d7 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Tue, 3 Feb 2026 14:27:12 +0000 Subject: [PATCH 13/24] test: add unittests for rounding function --- pronto/tests/pronto_test.py | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py index 9213355..d8ebbf2 100644 --- a/pronto/tests/pronto_test.py +++ b/pronto/tests/pronto_test.py @@ -181,4 +181,54 @@ def test_glob_tsoppi_file(inputs, exception, want): def test_normalize_column_index(inputs, exception, want): with exception: get = pronto.pronto.normalize_column_index(*inputs) + assert want.equals(get) + +@pytest.mark.parametrize( + "inputs, exception, want", + [ + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3.333, 4.444], + }), + "two", + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + "two": ["3.33", "4.44"], + }), + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3.666, 4.777], + }), + "two", + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + "two": ["3.67", "4.78"], + }), + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2], + }), + "two", + ), + does_not_raise(), + pandas.DataFrame({ + "one": [1, 2], + }), + ), + ] +) +def test_set_column_to_2_decimals(inputs, exception, want): + with exception: + get = pronto.pronto.set_column_to_2_decimals(*inputs) assert want.equals(get) \ No newline at end of file From 2ec761f8c21aca43ecf7ee0909b749163fb7cbd3 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Wed, 4 Feb 2026 08:16:54 +0000 Subject: [PATCH 14/24] style: be consistent with using idx for index --- Script/PRONTO.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 23c543a..eed437d 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -756,7 +756,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ if not table_max_rows_per_slide: table_max_rows_per_slide = rows total_slides_needed = math.ceil(rows / table_max_rows_per_slide) - start_slide_index = None if total_slides_needed > 1 else slide_n + start_slide_idx = None if total_slides_needed > 1 else slide_n ppt = Presentation(output_ppt_file) for page_num in range(total_slides_needed): @@ -765,7 +765,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ data_rows = table_data.values.tolist() current_page_data = data_rows[start_idx:end_idx] # use df current_page_rows = len(current_page_data) - if(start_slide_index is not None and page_num == 0): + if(start_slide_idx is not None and slide_idx == 0): slide = ppt.slides[slide_n - 1] else: slide = ppt.slides.add_slide(ppt.slide_layouts[6]) From 7f080ea093e3356d94ec2e21ee22db82d8690a3e Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Wed, 4 Feb 2026 08:22:22 +0000 Subject: [PATCH 15/24] chore: rename page_num to slide_idx to be consistent with naming --- Script/PRONTO.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index eed437d..bf57686 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -759,7 +759,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ start_slide_idx = None if total_slides_needed > 1 else slide_n ppt = Presentation(output_ppt_file) - for page_num in range(total_slides_needed): + for slide_idx in range(total_slides_needed): start_idx = page_num * table_max_rows_per_slide end_idx = min(start_idx + table_max_rows_per_slide, rows) data_rows = table_data.values.tolist() @@ -791,7 +791,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ tf = textbox.text_frame if(if_print_rowNo == True): if(total_slides_needed > 1): - tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")" + tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")" else: tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")" else: From a81fa30c6c2bf436cac8d9c43ac22ef880a5911b Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 13:56:22 +0000 Subject: [PATCH 16/24] fix: handle rouding if type is string --- pronto/pronto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pronto/pronto.py b/pronto/pronto.py index ca34994..f91a22e 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -43,6 +43,8 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str): if col_name in df.columns: + if df[col_name].dtype != float: + df[col_name] = df[col_name].astype(float) df[col_name] = df[col_name].map('{:.2f}'.format) else: logging.info("Column {} not found in dataframe".format(col_name)) From afdbf800146a0e58ff74a964db06ab3ec3a098f9 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 13:57:10 +0000 Subject: [PATCH 17/24] test: include test case for decimal rounding that contains strings --- pronto/tests/pronto_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py index d8ebbf2..3bbc96d 100644 --- a/pronto/tests/pronto_test.py +++ b/pronto/tests/pronto_test.py @@ -204,7 +204,7 @@ def test_normalize_column_index(inputs, exception, want): ( pandas.DataFrame({ "one": [1, 2], - "two": [3.666, 4.777], + "two": ['3.666', '4.777'], }), "two", ), From 976c0ca1dcd4d4bb8634dced6dcdcf130c178966 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 13:59:45 +0000 Subject: [PATCH 18/24] feat: introduce functions to get table data per slide and add table name to slide --- pronto/pronto.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pronto/pronto.py b/pronto/pronto.py index f91a22e..94b5809 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -2,6 +2,7 @@ import logging import os import pandas +import pptx # get tumor mutational burden label def get_tmb_string(val): @@ -48,4 +49,31 @@ def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str): df[col_name] = df[col_name].map('{:.2f}'.format) else: logging.info("Column {} not found in dataframe".format(col_name)) - return df \ No newline at end of file + return df + +# get data fitting on one slide based on slide index and max rows per slide +def get_slide_table_data(df: pandas.DataFrame, slide_idx: int, max_rows: int): + start = slide_idx * max_rows + stop = min(start + max_rows, len(df)) + if start >= len(df): + return [] + table = df.values.tolist() + header = [df.columns.tolist()] + table_data = header + table[start:stop] + return table_data + +# add constructed table name to slide and format the textbox +def add_table_name(shapes: pptx.shapes.shapetree.SlideShapes, table_name: str, left: float, top: float, width: float, height: float, font_size: float, print_row_num: bool, slide_idx: int, total_slides: int, rows: int): + + # add textbox to slide + paragraph = shapes.add_textbox(pptx.util.Inches(left), pptx.util.Inches(top), pptx.util.Inches(width), pptx.util.Inches(height)).text_frame.paragraphs[0] + + # construct table name with optional row number and slide count + part_1 = ", Page {}/{}".format(slide_idx + 1, total_slides) if total_slides > 1 else '' + part_2 = " (N={}{})".format(rows, part_1) if print_row_num else '' + paragraph.text = "{}{}".format(table_name, part_2) + + # font formatting and placement + paragraph.font.size = pptx.util.Pt(font_size) + paragraph.font.bold = True + paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER From 51628f8cc34c1648b7bb783049ab22edfb2c338c Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 14:00:06 +0000 Subject: [PATCH 19/24] style: add comments to code --- pronto/pronto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pronto/pronto.py b/pronto/pronto.py index 94b5809..3712a9a 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -30,6 +30,7 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units): logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus)) raise ValueError +# normalize dataframe to expected column indices def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): # determine current, missing and additional column indices curr_col_idx = df.columns.tolist() @@ -42,6 +43,7 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): all_col_idx = exp_col_idx + add_col_idx return df[all_col_idx] +# set dataframe column format to 2 decimal points def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str): if col_name in df.columns: if df[col_name].dtype != float: From a686731ffa14ce6b471747e011c7d79aea9c922c Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 14:00:34 +0000 Subject: [PATCH 20/24] test: add tests for table data and table name functions --- pronto/tests/pronto_test.py | 186 +++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 1 deletion(-) diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py index 3bbc96d..7192c32 100644 --- a/pronto/tests/pronto_test.py +++ b/pronto/tests/pronto_test.py @@ -1,4 +1,5 @@ import pandas +import pptx import pytest import pronto.pronto @@ -231,4 +232,187 @@ def test_normalize_column_index(inputs, exception, want): def test_set_column_to_2_decimals(inputs, exception, want): with exception: get = pronto.pronto.set_column_to_2_decimals(*inputs) - assert want.equals(get) \ No newline at end of file + assert want.equals(get) + +def list_of_lists_equal(list1, list2): + if len(list1) != len(list2): + return False + for sublist1, sublist2 in zip(list1, list2): + print(sublist1, sublist2) + if sublist1 != sublist2: + return False + return True + +@pytest.mark.parametrize( + "inputs, exception, want", + [ + ( + ( + pandas.DataFrame({ + "one": [1, 2], + "two": [3, 4], + }), + 0, + 3, + ), + does_not_raise(), + [ + ["one", "two"], + [1, 3], + [2, 4], + ], + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2, 3 ,4], + "two": [5, 6, 7, 8], + }), + 1, + 2, + ), + does_not_raise(), + [ + ["one", "two"], + [3, 7], + [4, 8], + ], + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2, 3], + "two": [5, 6, 7], + }), + 1, + 2, + ), + does_not_raise(), + [ + ["one", "two"], + [3, 7], + ], + ), + ( + ( + pandas.DataFrame({ + "one": [1, 2, 3], + "two": [5, 6, 7], + }), + 2, + 2, + ), + does_not_raise(), + [], + ), + ] +) +def test_get_slide_table_data(inputs, exception, want): + with exception: + get = pronto.pronto.get_slide_table_data(*inputs) + assert list_of_lists_equal(get, want) + +def check_shape(shape, want_left, want_top, want_width, want_height): + assert shape.left == pptx.util.Inches(want_left) + assert shape.top == pptx.util.Inches(want_top) + assert shape.width == pptx.util.Inches(want_width) + assert shape.height == pptx.util.Inches(want_height) + +def check_paragraph(paragraph, want_text, want_font_size, want_bold, want_alignment): + assert paragraph.text == want_text + assert paragraph.font.size.pt == want_font_size + assert paragraph.font.bold == want_bold + assert paragraph.alignment == want_alignment + +@pytest.mark.parametrize( + "inputs, exception, want_shape, want_paragraph", + [ + ( + ( + 'Test', + 0.5, + 0.5, + 4, + 1, + 12, + True, + 0, + 3, + 4, + ), + does_not_raise(), + ( + 0.5, + 0.5, + 4, + 1, + ), + ( + 'Test (N=4, Page 1/3)', + 12.0, + True, + pptx.enum.text.PP_ALIGN.CENTER, + ) + ), + ( + ( + 'Test', + 0.5, + 0.5, + 4, + 1, + 12, + True, + 0, + 1, + 4, + ), + does_not_raise(), + ( + 0.5, + 0.5, + 4, + 1, + ), + ( + 'Test (N=4)', + 12.0, + True, + pptx.enum.text.PP_ALIGN.CENTER, + ) + ), + ( + ( + 'Test', + 0.5, + 0.5, + 4, + 1, + 12, + False, + 0, + 3, + 4, + ), + does_not_raise(), + ( + 0.5, + 0.5, + 4, + 1, + ), + ( + 'Test', + 12.0, + True, + pptx.enum.text.PP_ALIGN.CENTER, + ) + ), + ] +) +def test_add_table_name(inputs, exception, want_shape, want_paragraph): + with exception: + shapes = pptx.Presentation().slides.add_slide(pptx.Presentation().slide_layouts[6]).shapes + pronto.pronto.add_table_name(shapes, *inputs) + check_shape(shapes[0], *want_shape) + check_paragraph(shapes[0].text_frame.paragraphs[0], *want_paragraph) \ No newline at end of file From bca5f7e78329d0f996b0075bc219163fa44f3276 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 5 Feb 2026 14:02:19 +0000 Subject: [PATCH 21/24] feat: refactor and simplify insert_table_to_ppt --- Script/PRONTO.py | 60 +++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index bf57686..c5be538 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -733,7 +733,7 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_ ppt.save(output_ppt_file) -def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide): +def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,print_row_num,table_column_width,table_max_rows_per_slide): # load table data try: @@ -752,53 +752,41 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ cols = len(table_header) rows = len(table_data) - # how many slides, and start slide index + # how many slides are required if not table_max_rows_per_slide: table_max_rows_per_slide = rows total_slides_needed = math.ceil(rows / table_max_rows_per_slide) - start_slide_idx = None if total_slides_needed > 1 else slide_n + # Add data to ppt ppt = Presentation(output_ppt_file) for slide_idx in range(total_slides_needed): - start_idx = page_num * table_max_rows_per_slide - end_idx = min(start_idx + table_max_rows_per_slide, rows) - data_rows = table_data.values.tolist() - current_page_data = data_rows[start_idx:end_idx] # use df - current_page_rows = len(current_page_data) - if(start_slide_idx is not None and slide_idx == 0): - slide = ppt.slides[slide_n - 1] + current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide) + if(total_slides_needed == 1): + shapes = ppt.slides[slide_n - 1].shapes else: - slide = ppt.slides.add_slide(ppt.slide_layouts[6]) - shapes = slide.shapes + shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes + + # create new table on slide left = Inches(left_t) top = Inches(top_t) width = Inches(width_t) height = Inches(height_t) - table_rows = current_page_rows + 1 + table_rows = len(current_slide_data) table = shapes.add_table(table_rows,cols,left,top,width,height).table - for c in range(cols): - if table_column_width: - table.columns[c].width = Inches(table_column_width[c]) - table.cell(0,c).text = table_header[c] - table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size) - - for row_idx, row_data in enumerate(current_page_data, start=1): - for col_idx in range(cols): - table.cell(row_idx,col_idx).text = str(row_data[col_idx]) - table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size) - - textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25)) - tf = textbox.text_frame - if(if_print_rowNo == True): - if(total_slides_needed > 1): - tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")" - else: - tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")" - else: - tf.paragraphs[0].text = table_name - tf.paragraphs[0].font.size = Pt(8) - tf.paragraphs[0].font.bold = True - tf.paragraphs[0].alignment = PP_ALIGN.CENTER + + # if table_column_width is provided, set the column width + if len(table_column_width) == cols: + for col_idx, width in enumerate(table_column_width): + table.columns[col_idx].width = Inches(width) + + # fill in the table data and set font size + for row_idx, row in enumerate(table.rows): + for col_idx, cell in enumerate(row.cells): + cell.text = current_slide_data[row_idx][col_idx] + cell.text_frame.paragraphs[0].font.size = Pt(font_size) + + # add table title + pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows) ppt.save(output_ppt_file) return rows From ce3dcedfcefef7dc3691a2bb3df6a2fa843739a2 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Thu, 12 Feb 2026 12:01:02 +0000 Subject: [PATCH 22/24] fix: use correct function name, thanks @xiaoliz0 --- Script/PRONTO.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index c5be538..2062067 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -786,7 +786,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ cell.text_frame.paragraphs[0].font.size = Pt(font_size) # add table title - pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows) + pronto.add_table_name(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows) ppt.save(output_ppt_file) return rows From bb2daccad7a32f0cd91a175bcfc0558186ef5910 Mon Sep 17 00:00:00 2001 From: Martin Rippin Date: Fri, 13 Feb 2026 12:40:50 +0000 Subject: [PATCH 23/24] fix: handle strings with % and floats in set_column_to_2_decimals --- pronto/pronto.py | 7 +++---- pronto/tests/pronto_test.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pronto/pronto.py b/pronto/pronto.py index 3712a9a..fc22dde 100644 --- a/pronto/pronto.py +++ b/pronto/pronto.py @@ -43,12 +43,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list): all_col_idx = exp_col_idx + add_col_idx return df[all_col_idx] -# set dataframe column format to 2 decimal points +# set dataframe column format to 2 decimal points if float type def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str): if col_name in df.columns: - if df[col_name].dtype != float: - df[col_name] = df[col_name].astype(float) - df[col_name] = df[col_name].map('{:.2f}'.format) + if df[col_name].dtype == float: + df[col_name] = df[col_name].map('{:.2f}'.format) else: logging.info("Column {} not found in dataframe".format(col_name)) return df diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py index 7192c32..e58028e 100644 --- a/pronto/tests/pronto_test.py +++ b/pronto/tests/pronto_test.py @@ -205,14 +205,14 @@ def test_normalize_column_index(inputs, exception, want): ( pandas.DataFrame({ "one": [1, 2], - "two": ['3.666', '4.777'], + "two": ['21.0%', '0.5%'], }), "two", ), does_not_raise(), pandas.DataFrame({ "one": [1, 2], - "two": ["3.67", "4.78"], + "two": ["21.0%", "0.5%"], }), ), ( From 26af7f02f1307fede380b1adee740e458f4fda6a Mon Sep 17 00:00:00 2001 From: Martin Rippin <74295098+marrip@users.noreply.github.com> Date: Wed, 29 Apr 2026 09:18:53 +0200 Subject: [PATCH 24/24] fix: apply @xiaoliz0 's suggestions Co-authored-by: Xiaoli Zhang <81294502+xiaoliz0@users.noreply.github.com> --- Script/PRONTO.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 2062067..8b07304 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -759,9 +759,10 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ # Add data to ppt ppt = Presentation(output_ppt_file) + total_slides = len(ppt.slides) for slide_idx in range(total_slides_needed): current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide) - if(total_slides_needed == 1): + if(total_slides_needed == 1 and slide_n <= total_slides): shapes = ppt.slides[slide_n - 1].shapes else: shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes @@ -782,7 +783,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ # fill in the table data and set font size for row_idx, row in enumerate(table.rows): for col_idx, cell in enumerate(row.cells): - cell.text = current_slide_data[row_idx][col_idx] + cell.text = str(current_slide_data[row_idx][col_idx]) cell.text_frame.paragraphs[0].font.size = Pt(font_size) # add table title