Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9278f15
Add new function to split the table generated in the end of the repor…
xiaoliz0 Jan 9, 2026
813bfdf
Update header information for the table in the end based on the comme…
xiaoliz0 Jan 13, 2026
64d8093
Update codes based on comments from Matin. Merge the two functions of…
xiaoliz0 Jan 20, 2026
7d76914
Merge branch 'main' into develop_issue91
xiaoliz0 Jan 21, 2026
995f7a8
Update codes based on the comments from Martin.
xiaoliz0 Jan 27, 2026
18362bc
Remove the return value for total number of added slides for the tabl…
xiaoliz0 Jan 27, 2026
48d78e2
Merge branch 'develop_issue91' of https://github.com/InPreD/PRONTO in…
xiaoliz0 Jan 27, 2026
bebe60c
chore: add pandas and sort alphabetically
marrip Jan 29, 2026
2d84df7
feat: add function to normalize column indices
marrip Jan 29, 2026
fd2e0ac
test: add unittests for column index normalization function
marrip Jan 29, 2026
09e6aee
feat: use column index normalizing function in main script, add comme…
marrip Jan 29, 2026
22edc4b
feat: make warning on empty file more specific
marrip Jan 29, 2026
7f5509a
feat: simply variable setting via if statement
marrip Jan 29, 2026
282a54e
feat: round floats in AF_tumor_DNA to 2 decimal places
marrip Feb 3, 2026
b88f652
test: add unittests for rounding function
marrip Feb 3, 2026
2ec761f
style: be consistent with using idx for index
marrip Feb 4, 2026
7f080ea
chore: rename page_num to slide_idx to be consistent with naming
marrip Feb 4, 2026
a81fa30
fix: handle rouding if type is string
marrip Feb 5, 2026
afdbf80
test: include test case for decimal rounding that contains strings
marrip Feb 5, 2026
976c0ca
feat: introduce functions to get table data per slide and add table n…
marrip Feb 5, 2026
51628f8
style: add comments to code
marrip Feb 5, 2026
a686731
test: add tests for table data and table name functions
marrip Feb 5, 2026
bca5f7e
feat: refactor and simplify insert_table_to_ppt
marrip Feb 5, 2026
ce3dced
fix: use correct function name, thanks @xiaoliz0
marrip Feb 12, 2026
bb2dacc
fix: handle strings with % and floats in set_column_to_2_decimals
marrip Feb 13, 2026
26af7f0
fix: apply @xiaoliz0 's suggestions
marrip Apr 29, 2026
39705da
Merge pull request #99 from marrip/insert_table_to_ppt
xiaoliz0 Apr 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Config/configure_PRONTO.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ data_path = /data/sample_data/analysis_results/
encoding_sys = utf-8
;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables):
filter_col_nu = 5
;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables.
table_max_rows_per_slide = 14
;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files.
material_file_version = 2026

Expand Down
120 changes: 60 additions & 60 deletions Script/PRONTO.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from decimal import Decimal
from copy import deepcopy
import pronto.pronto as pronto
import pandas
import math
from pdf2image import convert_from_path

runID = ""
Expand Down Expand Up @@ -731,67 +733,64 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
ppt.save(output_ppt_file)


def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width):
table_file = open(table_data_file)
lines = table_file.readlines()
if not lines:
def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,print_row_num,table_column_width,table_max_rows_per_slide):

# load table data
try:
table_data = pandas.read_csv(table_file, sep='\t')
except pandas.errors.EmptyDataError:
logging.warning("{} is empty".format(table_file))
return
first_line = lines[0]
rows = len(lines)
first_line_cells = first_line.split('\t')

# add empty columns for missing header columns and move additional columns to the right
table_data = pronto.normalize_column_index(table_data, table_header)

# round floats to 2 decimal places
table_data = pronto.set_column_to_2_decimals(table_data, "AF_tumor_DNA")

# determine column and row number
cols = len(table_header)
header_not_exist_in_table = []
for n in range(len(table_header)):
if_exist = False
if(table_header[n] in first_line_cells):
if_exist = True
if not if_exist:
header_not_exist_in_table.append(n)
rows = len(table_data)

# how many slides are required
if not table_max_rows_per_slide:
table_max_rows_per_slide = rows
total_slides_needed = math.ceil(rows / table_max_rows_per_slide)

# Add data to ppt
ppt = Presentation(output_ppt_file)
try:
slide = ppt.slides[slide_n-1]
except:
slide = ppt.slides.add_slide(ppt.slide_layouts[6])
shapes = slide.shapes
left = Inches(left_t)
top = Inches(top_t)
width = Inches(width_t)
height = Inches(height_t)
table = shapes.add_table(rows,cols,left,top,width,height).table
table_rows = rows-1
col_protein_change_short_position = table_header.index("Protein_change_short")
for c in range(cols):
if table_column_width:
table.columns[c].width = Inches(table_column_width[c])
table.cell(0,c).text = table_header[c]
table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)

row = 1
for line in open(table_data_file):
if(line != first_line):
line_cells = line.split('\t')
if header_not_exist_in_table:
for num in header_not_exist_in_table:
line_cells.insert(num," ")
if(line_cells[col_protein_change_short_position].endswith('X')):
line_cells[col_protein_change_short_position] = line_cells[col_protein_change_short_position][:-1] + '*'
for j in range(len(line_cells) - 1):
table.cell(row,j).text = str(line_cells[j])
table.cell(row,j).text_frame.paragraphs[0].font.size = Pt(font_size)
row += 1
textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
tf = textbox.text_frame
if(if_print_rowNo == True):
tf.paragraphs[0].text = table_name +" (N=" + str(table_rows) + ")"
else:
tf.paragraphs[0].text = table_name
tf.paragraphs[0].font.size = Pt(8)
tf.paragraphs[0].font.bold = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER
total_slides = len(ppt.slides)
for slide_idx in range(total_slides_needed):
current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide)
if(total_slides_needed == 1 and slide_n <= total_slides):
shapes = ppt.slides[slide_n - 1].shapes
else:
shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes

# create new table on slide
left = Inches(left_t)
top = Inches(top_t)
width = Inches(width_t)
height = Inches(height_t)
table_rows = len(current_slide_data)
table = shapes.add_table(table_rows,cols,left,top,width,height).table

# if table_column_width is provided, set the column width
if len(table_column_width) == cols:
for col_idx, width in enumerate(table_column_width):
table.columns[col_idx].width = Inches(width)

# fill in the table data and set font size
for row_idx, row in enumerate(table.rows):
for col_idx, cell in enumerate(row.cells):
cell.text = str(current_slide_data[row_idx][col_idx])
cell.text_frame.paragraphs[0].font.size = Pt(font_size)

# add table title
pronto.add_table_name(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)

ppt.save(output_ppt_file)
data_nrows = table_rows
return data_nrows
return rows


def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
Expand Down Expand Up @@ -1524,7 +1523,7 @@ def main(argv):
slide6_table_font_size = 7
if_print_rowNo = False
for table_index in slide6_table_ppSlide:
slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[])
slide6_table_nrows = insert_table_to_ppt(slide6_table_data_file,table_index,slide6_table_name,slide6_header_left,slide6_header_top,slide6_header_width,slide6_table_left,slide6_table_top,slide6_table_width,slide6_table_height,slide6_table_font_size,slide6_table_header,output_ppt_file,if_print_rowNo,[],table_max_rows_per_slide=None)
output_file_preMTB_AppendixTable = output_file_preMTB_table_path + "_preMTBTable_Appendix.txt"
output_table_file_filterResults_AllReporVariants_CodingRegion = output_file_preMTB_table_path + "_AllReporVariants_CodingRegion.txt"
stable_text = update_ppt_variant_summary_table(slide6_table_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file)
Expand All @@ -1544,7 +1543,8 @@ def main(argv):
slide8_table_font_size = 7
if_print_rowNo = True
table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53]
slide8_table_nrows = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width)
table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide"))
insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide)

# Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report.
A2_to_extract=[2]
Expand All @@ -1555,12 +1555,12 @@ def main(argv):
B3_C1_to_extract = [4, 5]
pdf_page_image_to_ppt(CNV_overview_plots_pdf,output_ppt_file,B3_C1_to_extract,width_scale=1,height_scale=0.5)

# Change slides order.
# Change slides order.
ppt = Presentation(output_ppt_file)
slides = ppt.slides._sldIdLst
slides_list = list(slides)
slides.remove(slides_list[7])
slides.insert(12,slides_list[7])
slides.append(slides_list[7])
ppt.save(output_ppt_file)
print("Generate report for " + DNA_sampleID)
ppt_nr += 1
Expand Down
51 changes: 51 additions & 0 deletions pronto/pronto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import glob
import logging
import os
import pandas
import pptx

# get tumor mutational burden label
def get_tmb_string(val):
Expand All @@ -27,3 +29,52 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
else:
logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
raise ValueError

# normalize dataframe to expected column indices
def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
# determine current, missing and additional column indices
curr_col_idx = df.columns.tolist()
miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx))
add_col_idx = list(set(curr_col_idx) - set(exp_col_idx))
# add missing column indices
for i in miss_col_idx:
df[i] = ' '
# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
all_col_idx = exp_col_idx + add_col_idx
return df[all_col_idx]

# set dataframe column format to 2 decimal points if float type
def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
if col_name in df.columns:
if df[col_name].dtype == float:
df[col_name] = df[col_name].map('{:.2f}'.format)
else:
logging.info("Column {} not found in dataframe".format(col_name))
return df

# get data fitting on one slide based on slide index and max rows per slide
def get_slide_table_data(df: pandas.DataFrame, slide_idx: int, max_rows: int):
start = slide_idx * max_rows
stop = min(start + max_rows, len(df))
if start >= len(df):
return []
table = df.values.tolist()
header = [df.columns.tolist()]
table_data = header + table[start:stop]
return table_data

# add constructed table name to slide and format the textbox
def add_table_name(shapes: pptx.shapes.shapetree.SlideShapes, table_name: str, left: float, top: float, width: float, height: float, font_size: float, print_row_num: bool, slide_idx: int, total_slides: int, rows: int):

# add textbox to slide
paragraph = shapes.add_textbox(pptx.util.Inches(left), pptx.util.Inches(top), pptx.util.Inches(width), pptx.util.Inches(height)).text_frame.paragraphs[0]

# construct table name with optional row number and slide count
part_1 = ", Page {}/{}".format(slide_idx + 1, total_slides) if total_slides > 1 else ''
part_2 = " (N={}{})".format(rows, part_1) if print_row_num else ''
paragraph.text = "{}{}".format(table_name, part_2)

# font formatting and placement
paragraph.font.size = pptx.util.Pt(font_size)
paragraph.font.bold = True
paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
Loading
Loading