diff --git a/Config/configure_PRONTO.ini b/Config/configure_PRONTO.ini index 9b7fa28..6634a5e 100644 --- a/Config/configure_PRONTO.ini +++ b/Config/configure_PRONTO.ini @@ -7,20 +7,45 @@ inpred_node = OUS data_path = /data/sample_data/analysis_results/ ;Please modify this if you need to specify file encoding as "ISO-8859-1" or other encoding standards in your local environment to read special Norwegian characters. encoding_sys = utf-8 -;Specify the number of columns you want to do the filtering (NB: this will also make the script to generate the number of output tables): -filter_col_nu = 5 +;Specify the number of top filter sections with highest priority, these sections will be named staring with FILTER0 in this configure. (NB: the other filter sections will use the comnbined results from all FILTER0): +top_filter = 3 +;Specify the number of filter sections with low priority. (NB: this will also make the script to generate the number of output tables): +filter_col_nu = 4 ;Specify the number of max rows of the table per slide starting from the 8th slide in report. This is used to split long tables. table_max_rows_per_slide = 14 ;Please modify this for local env if you use MTF files to import the clinical data into meta file. Specify the version of year of the MTF files. material_file_version = 2026 -[FILTER1] +[FILTER0-1] +;pecify the column name need to be filtered: +filter_column = CPSR_ACMG_class,CPSR_ClinVar_class +;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition: +key_word = Likely_Pathogenic,Pathogenic +;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. +columns = Gene_symbol,Change_summary,Genomic_location,DNA_change,cDNA_change,Protein_change_short,Coding_status,Depth_tumor_DNA,AF_tumor_DNA,Depth_normal_DNA,AF_normal_DNA,Depth_tumor_RNA,AF_tumor_RNA,TSO500_LocalApp_class,Class_judgement_comments +;Specify the column name to add in the right end of the table to verify this filter condition. NB: if this value is given, a new column will be added into the tables in the report. Same column name in different filter sections will be merged into one filter column. +filter_column_add = Filter_rescued +;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': +output_table = pathogenic_germline + +[FILTER0-2] +;pecify the column name need to be filtered: +filter_column = Gene_symbol,Protein_change_short +;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition: +key_word = TERT +;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. +columns = Gene_symbol,Change_summary,Genomic_location,DNA_change,cDNA_change,Protein_change_short,Coding_status,Depth_tumor_DNA,AF_tumor_DNA,Depth_normal_DNA,AF_normal_DNA,Depth_tumor_RNA,AF_tumor_RNA,TSO500_LocalApp_class,Class_judgement_comments +;Specify the column name to add in the right end of the table to verify this filter condition. NB: if this value is given, a new column will be added into the tables in the report. Same column name in different filter sections will be merged into one filter column. +filter_column_add = Filter_rescued +;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': +output_table = TERT + +[FILTER0-3] +;In this filter section, the input data is the combined table generated from FILTER0. ;Specify the column name need to be filtered: filter_column = Class_judgement ;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition: key_word = LikSOM,SOM,LikGL,incon,include -;Specify if you want to output all columns based on the filter. 'True' means yes, and a default output table 'workingTable' will be generated in the output folder extra_files: -all_col_output = True ;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. columns = Gene_symbol,Change_summary,Genomic_location,DNA_change,cDNA_change,Protein_change_short,Coding_status,Depth_tumor_DNA,AF_tumor_DNA,Depth_normal_DNA,AF_normal_DNA,Depth_tumor_RNA,AF_tumor_RNA,TSO500_LocalApp_class,Class_judgement_comments ;Specify the miniment number of Depth_tumor for data to choose: @@ -28,51 +53,47 @@ min_depth_tumor_DNA = 0 ;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': output_table = preMTBTable_Appendix -[FILTER2] -;In this filter section, the input data is the first table generated from FILTER1. +[FILTER1] +;In this filter section, the input data is the combined table named 'workingTable' generated from all sections of FILTER0. ;Specify the column name need to be filtered: filter_column = Coding_status ;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition. And '!' means NOT: key_word = !x:noncoding_variant && !synonymous_variant -all_col_output = False ;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. -columns = Gene_symbol,Ensembl_transcript_ID,Exon_number,Protein_change_short,Coding_status,AF_tumor_DNA +columns = Gene_symbol,Ensembl_transcript_ID,Exon_number,Protein_change_short,Coding_status,AF_tumor_DNA,Filter_rescued ;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': output_table = sequence_summary -[FILTER3] -;In this filter section, the input data is the first table generated from FILTER1. +[FILTER2] +;In this filter section, the input data is the combined table named 'workingTable' generated from all sections of FILTER0. ;Specify the column name need to be filtered: filter_column = Coding_status ;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition. And '!' means NOT: key_word = !x:noncoding_variant -all_col_output = False ;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. -columns = Gene_symbol,Genomic_location,DNA_change,cDNA_change,Protein_change_short,Coding_status,Depth_tumor_DNA,AF_tumor_DNA +columns = Gene_symbol,Genomic_location,DNA_change,cDNA_change,Protein_change_short,Coding_status,Depth_tumor_DNA,AF_tumor_DNA,Filter_rescued ;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': output_table = AllReporVariants_CodingRegion -[FILTER4] -;In this filter section, the input data is the first table generated from FILTER1. +[FILTER3] +;In this filter section, the input data is the combined table named 'workingTable' generated from all sections of FILTER0. ;Specify the column name need to be filtered: filter_column = Coding_status ;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition. And '!' means NOT: key_word = !x:noncoding_variant && !synonymous_variant -all_col_output = False ;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. columns = Gene_symbol,Protein_change_short,Coding_status,AF_tumor_DNA ;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': output_table = AllReporVariants_AltProtein -[FILTER5] -;In this filter section, the input data is the first table generated from FILTER1. +[FILTER4] +;In this filter section, the input data is the combined table named 'workingTable' generated from all sections of FILTER0. ;Specify the column name need to be filtered: filter_column = Reporting_judgement ;Specify the key word for the filter column. If there are multiple keywords need to be filtered at the same time, please split them with ','. You can use ' && ' to combine the keywords startswith '!' as one filter condition. And '!' means NOT: key_word = BioMar,VUSpot,AddRes,GLrel -all_col_output = False ;Specify the column names need to be transfered to new data file. Split the column names with ','. NB: the column contents must be the same as they are in the sheet. -columns = Gene_symbol,Coding_status,Functional_domain,AF_tumor_DNA,CPSR_ACMG_class,Reporting_judgement +columns = Gene_symbol,Coding_status,Functional_domain,AF_tumor_DNA,CPSR_ACMG_class,Reporting_judgement,Filter_rescued ;Specify the output result file name. Please note that you type should not exceed 31 characters and not contain any of the characters ':' ' \' '/' '?' '*' '[' or ']': output_table = Main_Findings diff --git a/Script/PRONTO.py b/Script/PRONTO.py index 826003b..2996fa2 100755 --- a/Script/PRONTO.py +++ b/Script/PRONTO.py @@ -128,6 +128,7 @@ def read_tsv_col(data_file,filter_column,key_word,columns,MTB_format): Depth_tumor_DNA_col = 12 AF_tumor_DNA_col = 13 for line in open(data_file): + line = line.replace('\n', '') line_cells = line.split('\t') if(line_cells[0] == col0 and not mark): for col in range(len(line_cells)): @@ -178,6 +179,8 @@ def read_tsv_col(data_file,filter_column,key_word,columns,MTB_format): if(appear == False): d += 1 for num in column_mark: + while(len(line_cells) <= num): + line_cells.append('') if(num == Coding_status_col): line_cells[num] = line_cells[num].replace("_variant", "") + '\t' else: @@ -226,6 +229,20 @@ def read_tsv_col(data_file,filter_column,key_word,columns,MTB_format): return data +def add_filter_column_into_table(data_config,filter_column_add): + data = [] + for i, row in enumerate(data_config): + row = [item.replace('\n', '') for item in row] + if(row[-1] == ''): + row.pop() + if(i == 0): + row.append(filter_column_add+'\n') + else: + row.append("Yes\n") + data.append(row) + return data + + def filter_depth_tumor_all_col(data_config,depth_tumor_DNA): data = [] data.append(data_config[0]) @@ -682,7 +699,7 @@ def update_ppt_template_data(inpred_node,ipd_no,ipd_gender,ipd_age,ipd_diagnosis tf11.paragraphs[0].text = pathology_comment + "\n\n" + sample_info_comment.replace("|","\n") tf11.paragraphs[0].font.size = Pt(10) tf11.paragraphs[0].alignment = PP_ALIGN.LEFT - gender_age = "{}/{}y'.format(ipd_gender, age) + gender_age = "{}/{}y".format(ipd_gender, age) if(RNA_material_id != ""): ipd_material_id_index = "DNA:" + DNA_material_id + "\nRNA:" + RNA_material_id else: @@ -737,7 +754,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_ # load table data try: - table_data = pandas.read_csv(table_file, sep='\t') + table_data = pandas.read_csv(table_file, sep='\t', keep_default_na=False) except pandas.errors.EmptyDataError: logging.warning("{} is empty".format(table_file)) return @@ -1353,47 +1370,97 @@ def main(argv): tumor_content = '~' + str(int(tumor_content_float*100)) + '%' if(line.startswith('#') and "Size of the target coding region" in line): target_cod_region = float(line.split(':')[1]) - MTB_format = False - for i in range(0,filter_col_nu_config): - j = str(i + 1) - if(j == "2"): + for i in range(0,filter_col_nu_config+1): + filter_section = str(i) + if(filter_section == "0"): + all_data_filter = [] + top_filter = int(cfg.get("INPUT", "top_filter")) + 1 + for top_filter_num in range(1,top_filter): + filter_column = cfg.get("FILTER0-"+str(top_filter_num), "filter_column") + key_word = cfg.get("FILTER0-"+str(top_filter_num), "key_word") + columns = cfg.get("FILTER0-"+str(top_filter_num), "columns") + try: + filter_column_add = cfg.get("FILTER0-"+str(top_filter_num), "filter_column_add") + except: + filter_column_add = "" + try: + filter_min_depth_tumor_DNA = int(cfg.get("FILTER0-"+str(top_filter_num), "min_depth_tumor_DNA")) + except: + filter_min_depth_tumor_DNA = "" + output_table = cfg.get("FILTER0-"+str(top_filter_num), "output_table") + output_table_file_config_pre = output_file_preMTB_table_path + "_" + output_table + "_pre.txt" + output_table_file_config = output_file_preMTB_table_path + "_" + output_table + ".txt" + if(',' in filter_column): + for column in filter_column.split(','): + all_data = read_tsv(data_file_small_variant_table,column,key_word) + + else: + all_data = read_tsv(data_file_small_variant_table,filter_column,key_word) + if(filter_column_add != ""): + all_data = add_filter_column_into_table(all_data,filter_column_add) + if(filter_min_depth_tumor_DNA != ""): + all_data = filter_depth_tumor_all_col(all_data,filter_min_depth_tumor_DNA) + write_exl(output_table_file_config_pre,all_data) + clear_blank_line(output_table_file_config_pre,output_table_file_config) + all_data_filter.append(all_data) + + all_data_filter = sum(all_data_filter, []) + for i in range(len(all_data_filter)): + if(i == 0): + header_length = len(all_data_filter[i]) + else: + if(len(all_data_filter[i]) < header_length): + count = header_length - len(all_data_filter[i]) + all_data_filter[i] = [[item.replace('\n', '') for item in cell] for cell in all_data_filter[i]] + all_data_filter[i].pop() + for j in range(1, count): + all_data_filter[i].append(' \t') + all_data_filter[i].append('\n') + + unique_data = [] + for current in all_data_filter: + if(current[-1] == '\n'): + current.pop() + if(current[-1].endswith('\n\t')): + current[-1] = current[-1].replace('\n\t', '\n') + is_appear = False + for existing in unique_data: + if(len(existing) > len(current)): + current_content = current.copy() + current_content[-1] = current[-1].replace('\n', '\t') + for i in range(len(existing) - len(current_content) + 1): + if(existing[i:i+len(current_content)] == current_content): + is_appear = True + # Only print last column for the rescued variants which are not include in Filter0-3. + if(existing[-1] == 'Yes\n'): + existing[-1] = '\n' + break + if is_appear: + break + if(existing == current): + is_appear = True + break + if not is_appear: + unique_data.append(current) + + top_filter_output_file_pre = output_file_preMTB_table_path + "_preMTB_workingTable_pre.txt" + top_filter_output_file = output_file_preMTB_table_path + "_preMTB_workingTable.txt" + write_exl(top_filter_output_file_pre,unique_data) + clear_blank_line(top_filter_output_file_pre,top_filter_output_file) + continue + + if(filter_section == "1"): MTB_format = True - filter_column = cfg.get("FILTER"+j, "filter_column") - key_word = cfg.get("FILTER"+j, "key_word") - all_col_output = cfg.get("FILTER"+j, "all_col_output") - columns = cfg.get("FILTER"+j, "columns") - output_table = cfg.get("FILTER"+j, "output_table") + filter_column = cfg.get("FILTER"+filter_section, "filter_column") + key_word = cfg.get("FILTER"+filter_section, "key_word") + columns = cfg.get("FILTER"+filter_section, "columns") + output_table = cfg.get("FILTER"+filter_section, "output_table") output_table_file_config_pre = output_file_preMTB_table_path + "_" + output_table + "_pre.txt" output_table_file_config = output_file_preMTB_table_path + "_" + output_table + ".txt" if(DNA_normal_sampleID != ""): columns = columns + ",AF_normal_DNA" - if(all_col_output == "True"): - if(j == "1"): - filter1_min_depth_tumor_DNA = int(cfg.get("FILTER"+j, "min_depth_tumor_DNA")) - all_data = read_tsv(data_file_small_variant_table,filter_column,key_word) - if(all_data == []): - all_data_config_DepthTumor_DNA = "" - else: - all_data_config_DepthTumor_DNA = filter_depth_tumor_all_col(all_data,filter1_min_depth_tumor_DNA) - output_file_preMTB_workingTable_pre = output_file_preMTB_table_path + "_preMTB_workingTable_pre.txt" - output_file_preMTB_workingTable = output_file_preMTB_table_path + "_preMTB_workingTable.txt" - write_exl(output_file_preMTB_workingTable_pre,all_data_config_DepthTumor_DNA) - clear_blank_line(output_file_preMTB_workingTable_pre,output_file_preMTB_workingTable) - else: - all_data = read_tsv(output_file_preMTB_workingTable,filter_column,key_word) - write_exl(output_table_file_config_pre,all_data) - clear_blank_line(output_table_file_config_pre,output_table_file_config) - if(j == "1"): - filter1_min_depth_tumor_DNA = int(cfg.get("FILTER"+j, "min_depth_tumor_DNA")) - data = read_tsv_col(data_file_small_variant_table,filter_column,key_word,columns,MTB_format) - if(data == []): - data_DepthTumor_DNA = "" - else: - data_DepthTumor_DNA = filter_depth_tumor_cols(data,filter1_min_depth_tumor_DNA) - write_exl(output_table_file_config_pre,data_DepthTumor_DNA) - else: - data = read_tsv_col(output_file_preMTB_workingTable,filter_column,key_word,columns,MTB_format) - write_exl(output_table_file_config_pre,data) + all_data = read_tsv_col(top_filter_output_file,filter_column,key_word,columns,MTB_format) + write_exl(output_table_file_config_pre,all_data) clear_blank_line(output_table_file_config_pre,output_table_file_config) MTB_format = False @@ -1442,10 +1509,10 @@ def main(argv): TMB_DURP_coding_file = output_path + DNA_sampleID + "_TMB_DURP_coding.txt" TMB_DRUP_filter_key_word = cfg.get("TMB", "TMB_DRUP_filter_key_word") - TMB_coding_data = read_tsv(output_file_preMTB_workingTable,TMB_filter_column,TMB_filter_key_word) + TMB_coding_data = read_tsv(top_filter_output_file,TMB_filter_column,TMB_filter_key_word) write_exl(TMB_coding_file_pre,TMB_coding_data) clear_blank_line(TMB_coding_file_pre,TMB_coding_file) - TMB_DRUP_coding_data = read_tsv(output_file_preMTB_workingTable,TMB_filter_column,TMB_DRUP_filter_key_word) + TMB_DRUP_coding_data = read_tsv(top_filter_output_file,TMB_filter_column,TMB_DRUP_filter_key_word) write_exl(TMB_DURP_coding_file_pre,TMB_DRUP_coding_data) clear_blank_line(TMB_DURP_coding_file_pre,TMB_DURP_coding_file) @@ -1491,7 +1558,7 @@ def main(argv): insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_path,RNA_image_path,output_ppt_file) # Insert tables into PP file: - slide8_table_header = ["Gene_symbol", "Genomic_coordinates_in_hg19_build", "Ensembl_transcript_ID", "Exon_number", "Protein_change_short", "HGVS_syntax", "Change_summary", "Coding_status", "Read_depth(variant reads/total reads)", "AF_tumor_DNA"] + slide8_table_header = ["Gene_symbol", "Genomic_coordinates_in_hg19_build", "Ensembl_transcript_ID", "Exon_number", "Protein_change_short", "HGVS_syntax", "Change_summary", "Coding_status", "Read_depth(variant reads/total reads)", "AF_tumor_DNA", "Filter_rescued"] if(DNA_normal_sampleID != ""): slide6_table_header = ["Gene_symbol", "Protein_change_short", "Coding_status", "AF_tumor_DNA", "AF_normal_DNA"] else: @@ -1524,15 +1591,15 @@ def main(argv): slide8_header_left = 0.25 slide8_header_top = 0.27 slide8_header_width = 8.98 - slide8_table_left = 0.3 + slide8_table_left = 0.15 slide8_table_top = 0.55 slide8_table_width = 9.19 slide8_table_height = 2.81 slide8_table_font_size = 7 if_print_rowNo = True - table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53] + table8_column_width = [0.54, 0.96, 0.96, 0.51, 0.73, 1.12, 2.26, 0.79, 0.81, 0.53, 0.53] table_max_rows_per_slide = int(cfg.get("INPUT", "table_max_rows_per_slide")) - insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) + _ = insert_table_to_ppt(slide8_table_data_file,slide8_table_ppSlide,slide8_table_name,slide8_header_left,slide8_header_top,slide8_header_width,slide8_table_left,slide8_table_top,slide8_table_width,slide8_table_height,slide8_table_font_size,slide8_table_header,output_ppt_file,if_print_rowNo,table8_column_width,table_max_rows_per_slide) # Insert the CNV_overveiw_plots pictures A2, B3 and C1 into report. A2_to_extract=[2] @@ -1577,8 +1644,6 @@ def main(argv): move_ipd_material_file = shutil.move(ipd_material_file_new, extra_path) if os.path.exists(ipd_material_file_2023): move_ipd_material_file = shutil.move(ipd_material_file_2023, extra_path) - if os.path.exists(InPreD_clinical_tsoppi_data_file): - DNA_if_generate_report = "-" update_clinical_tsoppi_file(InPreD_clinical_tsoppi_data_file,DNA_sampleID,DNA_if_generate_report,ipd_birth_year,ipd_clinical_diagnosis,ipd_gender,ipd_consent,DNA_material_id,ipd_collection_year,requisition_hospital,extraction_hospital,tumor_content_nr,batch_nr,str(sample_material),sample_type,str(tumor_type),str_TMB_DRUP,TMB_TSO500,MSI_TSO500,pipline,pathology_comment,sample_info_comment) if(RNA_sampleID != ""): RNA_if_generate_report = "-" diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/251114_A02134_0115_BHCJCKDRX7_variant_summary.tsv b/test_data/ous/test_files_for_PRONTO_newFunctions/251114_A02134_0115_BHCJCKDRX7_variant_summary.tsv new file mode 100644 index 0000000..df9d2bf --- /dev/null +++ b/test_data/ous/test_files_for_PRONTO_newFunctions/251114_A02134_0115_BHCJCKDRX7_variant_summary.tsv @@ -0,0 +1,28 @@ +# Field formats: +# - TMB: TMB (elligible_variants) +# - MSI: percentage_of_unstable_MSI_sites (unstable_MSI_site_count/usable_MSI_site_count) +# - gene_amplifications: gene (fold_change) +# - splice_variants: gene[affected_exon] (breakpoint_gene_1-breakpoint_gene_2) splice_supporting_reads/reference_reads +# - fusions: gene_1-gene_2 (breakpoint_gene_1-breakpoint_gene_2) fusion_supporting_reads/ref_reads_gene_1/ref_reads_gene_2 +sample_id TMB MSI gene_amplifications splice_variants fusions +IPD2091-D01-N02-A28_P 0.8 (1) 1.04 (1/96) NA NA NA +IPD2142-D01-d01-A15_P 7.8 (10) 0.88 (1/114) EGFR (1.509)|ERCC2 (1.597)|ERCC1 (1.545) NA NA +IPD2142-R03-d01-A15_P NA NA NA NA NA +IPD2203-D01-d01-A09_P 10.2 (13) 4.17 (5/120) EGFR (1.578)|MET (1.580)|BRCA2 (1.424)|LAMP1 (1.493) NA NA +IPD2203-R03-d01-A09_P NA NA NA NA NA +IPD2216-R03-P01-A12_P NA NA NA NA NA +IPD2222-D01-R01-A18_P 3.1 (4) 3.28 (4/122) MET (1.545)|MYC (1.482)|FGFR2 (1.463)|CCNE1 (1.495)|AKT2 (1.449) NA NA +IPD2222-R03-R01-A18_P NA NA NA NA NA +IPD2223-D01-P01-A08_P 3.1 (4) 2.83 (3/106) MYCL (1.435)|MYCN (236.886)|PIK3CA (1.559)|EGFR (2.110)|CDK6 (1.924)|MET (2.086)|BRAF (1.943)|FGF23 (1.511)|KRAS (1.476)|CDK4 (95.292)|AKT2 (1.706) NA NA +IPD2223-R03-P01-A08_P NA NA NA NA CDK4/PDE3A (chr12:1-chr12:1) 15/31604/396 +IPD2224-D01-P01-A12_P 4.7 (6) 0.00 (0/111) FGFR1 (1.791) NA NA +IPD2224-R03-P01-A12_P NA NA NA NA NA +IPD2225-D01-P01-A08_P 14.9 (19) 4.13 (5/121) NA NA NA +IPD2225-R03-P01-A08_P NA NA NA NA NA +IPD2226-D01-P01-A09_P 8.7 (11) 1.85 (2/108) MDM4 (1.455)|FGF3 (1.948)|FGF9 (1.840)|BRCA2 (1.631)|FGF14 (2.021)|LAMP1 (1.844) NA NA +IPD2226-R03-P01-A09_P NA NA NA NA NA +IPD2227-D01-X01-A08_P 0.8 (1) 0.83 (1/120) NA NA NA +IPD2234-D01-P01-F25_P 2.4 (3) 2.68 (3/112) FGFR4 (3.370)|NRG1 (1.711)|FGFR1 (1.788)|MYC (1.703)|FGF23 (1.631)|KRAS (1.539)|ERBB3 (1.517)|CDK4 (1.604)|ERBB2 (1.715)|BRCA1 (1.657)|RPS6KB1 (1.603)|ERCC2 (2.189) NA NA +IPD2234-R03-P01-F25_P NA NA NA NA NA +IPD2243-D01-P01-A28_P 10.2 (13) 5.88 (7/119) NA NA NA +IPD2243-R03-P01-A28_P NA NA NA NA NA diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_CNV_overview_plots.pdf b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_CNV_overview_plots.pdf new file mode 100644 index 0000000..acffab8 Binary files /dev/null and b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_CNV_overview_plots.pdf differ diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_sample_QC_plot.pdf b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_sample_QC_plot.pdf new file mode 100644 index 0000000..8c72b21 Binary files /dev/null and b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_sample_QC_plot.pdf differ diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_small_variant_table_forQC.tsv b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_small_variant_table_forQC.tsv new file mode 100644 index 0000000..d789be3 --- /dev/null +++ b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-D01-P01-A08_small_variant_table_forQC.tsv @@ -0,0 +1,118 @@ +# [IPD2225-D01-P01-A08] Version string: 2.2.0.12/0.3.2:22-06-07/1.0.0/1.0.0/hg19/TR +# [IPD2225-D01-P01-A08] Variants included in this table satisfied at least one of the following criteria: +# [IPD2225-D01-P01-A08] - ""absent_in_normal"": (only applicable when a matched normal sample [MNS] is available) the variant site had non-zero coverage in the MNS and the VAF ratio between the tumor sample and the MNS was >=5.0" +# [IPD2225-D01-P01-A08] - ""ClinVar_classification"": the variant falls into one of the following ClinVar classes: Pathogenic, Likely_Pathogenic" +# [IPD2225-D01-P01-A08] - ""CPSR_classification"": the variant falls into one of the following CPSR classes (based on refined ACMG criteria): Pathogenic, Likely_Pathogenic" +# [IPD2225-D01-P01-A08] - Required variant class (as determined by the Illumina TSO500 Local App pipeline) - any of: +# [IPD2225-D01-P01-A08] - ""SOM"": the variant wasn't included in any class based on TSO500 LocalApp filtering, it was therefore deemed to be of somatic origin" +# [IPD2225-D01-P01-A08] - ""GL_P"": the variant didn't pass the AF-based ""germline variant proximity"" filter, which looks for (5+) known germline variants with similar AFs located on the same chromosome" +# [IPD2225-D01-P01-A08] - Required variant tier (as determined by the PCGR annotation) - any of: +# [IPD2225-D01-P01-A08] - ""TIER_1"": variant of strong clinical significance (a PCGR-assigned tier)" +# [IPD2225-D01-P01-A08] - ""TIER_2"": variant of potential clinical significance (a PCGR-assigned tier)" +# [IPD2225-D01-P01-A08] - Required protein change effect (in any gene transcript overlapping the variant); the format of this inclusion tag is ""protein_changes:X"", where X is a comma-separated list of variant-associated high-impact coding consequences from the following list:" +# [IPD2225-D01-P01-A08] - ""frameshift_variant""" +# [IPD2225-D01-P01-A08] - ""start_lost""" +# [IPD2225-D01-P01-A08] - ""stop_lost""" +# [IPD2225-D01-P01-A08] - ""stop_gained""" +# [IPD2225-D01-P01-A08] - ""splice_acceptor_variant""" +# [IPD2225-D01-P01-A08] - ""splice_donor_variant""" +# [IPD2225-D01-P01-A08] - In case of PCGR reporting an ""intergenic_variant"", protein-coding variant effects reported by LocalApp's Nirvana (except for ""synonymous_variant"", ""stop_retained_variant"", ""start_retained_variant"", ""transcript_variant"", ""coding_sequence_variant"" or ""intron_variant"") will be labeled with the following inclusion tag:" +# [IPD2225-D01-P01-A08] - ""intergenic_PCGR_coding_Nirvana""" +# [IPD2225-D01-P01-A08] The ""Inclusion_criteria"" column lists all the inclusion criteria satisfied by given variant." +# [IPD2225-D01-P01-A08] Please note that variants present on InPreD's small variant blacklist are recorded in a separate table despite fulfilling at least one of the listed inclusion criteria. +# [IPD2225-D01-P01-A08] The variants might be flagged by any number of the following custom filters, which point to properties potentially relevant during the quality control process: +# [IPD2225-D01-P01-A08] - ""normal_origin"": the variant was seen only in the MNS (more details below)" +# [IPD2225-D01-P01-A08] - ""germline_population"": the variant was seen in at least one of the 1000 genomes/gnomAD (sub-)populations with allelic frequency >=0.001" +# [IPD2225-D01-P01-A08] - ""recurrent_germline"": according to the variant InPreD recurrence information, the variant was seen with VAFs >= 0.35 in at least 10 % of investigated normal samples (requires at least 20 callable normal samples)" +# [IPD2225-D01-P01-A08] - ""recurrent_artifact"": according to the variant InPreD recurrence information, the variant was seen with VAFs >= [VAF in the processed sample] in at least 10 % of all the investigated tumor samples (based on recurrence table VAF tiers, not exact VAF values; requires at least 20 callable tumor samples)" +# [IPD2225-D01-P01-A08] - ""LOW_TUMOR_DP"": the read depth at the variant site in the tumor sample was <20" +# [IPD2225-D01-P01-A08] - ""LOW_TUMOR_VAF"": the variant allelic fraction in the tumor sample was <0.02" +# [IPD2225-D01-P01-A08] - ""HIGH_TUMOR_VAF"": the variant allelic fraction in the tumor sample was >0.98" +# [IPD2225-D01-P01-A08] - ""LOW_VAF_RATIO"": (only applicable when a MNS is available) the ratio between variant allelic fractions in the tumor sample and the MNS was <5.0" +# [IPD2225-D01-P01-A08] - ""HET_VAF_RANGE"": the variant allelic fraction in the tumor sample was within the range of 0.5 +/-0.15" +# [IPD2225-D01-P01-A08] - ""NO_NORMAL_COVERAGE"": (only applicable when a MNS is available) no coverage data was reported for the variant in the MNS" +# [IPD2225-D01-P01-A08] The ""Filters"" column lists all the filters failed by given variant." +# [IPD2225-D01-P01-A08] The ""Class_judgement"" [CJ] and ""Class_judgement_comments"" [CJC] columns are pre-filled in order the speed up the manual QC process, but their values should always be checked. A variant can be auto-excluded using the following rules:" +# [IPD2225-D01-P01-A08] - filters ""germline_population"", ""recurrent_germline"", ""recurrent_artifact"", ""LOW_TUMOR_DP"" and ""LOW_TUMOR_VAF"" will serve as exclusion criteria (also, any of these filter values will be copied to the CJC column when encountered);" +# [IPD2225-D01-P01-A08] - [exception] if a matched control is available and does not contain given variant, encountered ""germline_population"" and ""recurrent_germline"" filters will not be used as exclusion criteria; the CJC column will in that case instead contain value ""known_germline_variant_absent_in_matched_control"";" +# [IPD2225-D01-P01-A08] - ""no_recurrence_data"" flag in the CJC column is displayed for variants absent in the input variant recurrence table;" +# [IPD2225-D01-P01-A08] - if a matched control is available and the ""LOW_VAF_RATIO"" filter is being applied; the CJC column will include value ""variant_in_normal"";" +# [IPD2225-D01-P01-A08] - if a given variant has exactly the coding status combination of ""synonymous_variant""and ""splice_region_variant"" in the primary transcript while no RNA support has been observed for the variant, the CJC column will include value ""synonymous_splice_region_variant_with_no_RNA_coverage""." +# [IPD2225-D01-P01-A08] CPSR is run on matched normal samples when possible. Variants not 'found' in the DNA tumor sample (e.g., not fulfilling the required variant class criteria for that sample) will still be present in this table as long as they fulfill any of the following inclusion criteria: +# [IPD2225-D01-P01-A08] - ""protein_changes:X"" (as described above);" +# [IPD2225-D01-P01-A08] - ""intergenic_PCGR_coding_Nirvana"" (as described above);" +# [IPD2225-D01-P01-A08] - ""ClinVar_classification"" (as described above);" +# [IPD2225-D01-P01-A08] - ""CPSR_classification"" (as described above)." +# [IPD2225-D01-P01-A08] The following filters are applicable to the normal-only variants (in addition, the ""normal_origin"" filter will always be applied):" +# [IPD2225-D01-P01-A08] - ""germline_population"" (as described above);" +# [IPD2225-D01-P01-A08] - ""recurrent_germline"" (as described above);" +# [IPD2225-D01-P01-A08] - ""recurrent_artifact"" (as described above)." +# [IPD2225-D01-P01-A08] In case of normal-only variants, all the listed filters also serve as exclusion criteria. Normal-only variants will always be auto-excluded (they should not contribute to the TMB). +# [IPD2225-D01-P01-A08] The CJC column comment ""synonymous_splice_region_variant_with_no_RNA_coverage"" is replaced by comment ""synonymous_splice_region_variant_with_unknown_RNA_status"" in case of normal-only variants." +# [IPD2225-D01-P01-A08] The ""Coding_transcript_consequences"" field has the following format:" +# [IPD2225-D01-P01-A08] - the transcripts are grouped by sets of associated coding variant consequences (e.g., a group of transcripts with ""downstream_gene_variant"" consequence and a groups of transcripts with ""splice_region_variant,synonymous_variant"" consequences);" +# [IPD2225-D01-P01-A08] - the transcript groups are separated by semicolons and prefixed with a comma-seperated list of the associated consequences;" +# [IPD2225-D01-P01-A08] - individual transcript records within each group are separated by the pipe symbol (""|"") and contain the following elements separated by colons: a source tag (""R"" tag for RefSeq, ""E"" for Ensembl), gene symbol, the ""Canonical"" tag where applicable, the transcript ID and the cDNA and protein change codes;" +# [IPD2225-D01-P01-A08] Size of the target coding region (in millions ot bases): 1.27 +# [IPD2225-D01-P01-A08] Specified tumor purity (as a fraction between 0 and 1): 0.5 +# [IPD2225-D01-P01-A08] The ""TNC"" tier corresponds to PCGR-assigned tier ""NONCODING""." +# [IPD2225-D01-P01-A08] The ""TNA"" tier is assigned to variants seen only in the MNS (these haven't been annotated by PCGR)." +# [IPD2225-D01-P01-A08] ID of the matched normal DNA sample: NA +# [IPD2225-D01-P01-A08] ID of the matched tumor RNA sample: IPD2225-R03-P01-A08 +# [IPD2225-D01-P01-A08] ""Gene_CN"" column format: [High_confidence/non-High_confidence]_[Tumor_CN]/[Adjusted_Tumor_CN]_[Normal_CN]" +# [IPD2225-D01-P01-A08] ""Gene_predisposition"" column format: [category]_[note]_[associated_tumor_type]" +# [IPD2225-D01-P01-A08] - category 1: any age, any tumor type +# [IPD2225-D01-P01-A08] - category 2: any age, associated tumor type only +# [IPD2225-D01-P01-A08] - category 3: age < 30, any tumor type +# [IPD2225-D01-P01-A08] - category 4: age < 30, associated tumor type only +# [IPD2225-D01-P01-A08] - note a: Renal tumours to be excluded. +# [IPD2225-D01-P01-A08] - note b: MUTYH should be included for germline-focussed tumour analysis but reporting and germline follow-up testing should only be performed on detection of two pathogenic variants. +# [IPD2225-D01-P01-A08] - note c: Brain tumours to be excluded. +# [IPD2225-D01-P01-A08] ""TSO500_recurrence"" column format: [tumor_recurrence_summary]|[normal_recurrence_summary]|[any_sample_recurrence_summary]" +# [IPD2225-D01-P01-A08] The recurrence summary values are written in format ""X:A+B+C+D=M/N""" +# [IPD2225-D01-P01-A08] - X: sample type, one of ""T"" = tumor, ""N"" = normal, ""A"" = any" +# [IPD2225-D01-P01-A08] - A: number of samples of type X, in which given variant was seen with VAF < 0.01 +# [IPD2225-D01-P01-A08] - B: number of samples of type X, in which given variant was seen with 0.01 <= VAF < 0.05 +# [IPD2225-D01-P01-A08] - C: number of samples of type X, in which given variant was seen with 0.05 <= VAF < 0.35 +# [IPD2225-D01-P01-A08] - D: number of samples of type X, in which given variant was seen with 0.35 <= VAF +# [IPD2225-D01-P01-A08] - M: number of samples of type X, in which given variant was seen with any VAF +# [IPD2225-D01-P01-A08] - N: number of investigated samples of type X, in which given variant was callable (i.e., the variant site had coverage >= 20) +# [IPD2225-D01-P01-A08] Information regarding genes interrogated by the FoundationOne Liquid CDx assay (F1LCDx targets): +# [IPD2225-D01-P01-A08] - retrieved from F1LCDx Technical Specs (document version from July 2021; section ""Information Provided as a Professional Service"")" +# [IPD2225-D01-P01-A08] - the following optional elements (separated by a semicolon) might be included in the target information string: +# [IPD2225-D01-P01-A08] - a gene alias not compatible with PCGR (e.g., ""aka[FAM123B]"");" +# [IPD2225-D01-P01-A08] - a list of introns and/or a list of exons (in any order, separated by a comma) investigated at base sensitivity (e.g., ""all_exons,introns[7-10]"");" +# [IPD2225-D01-P01-A08] - a list of introns and/or a list of exons (in any order, separated by a comma) investigated at increased sensitivity - jointly prefixed by ""IS:"" (e.g., ""IS:exons[20-29],introns[18-19]"");" +# [IPD2225-D01-P01-A08] - other, special tags, as listed in the source document (e.g., ""ncRNA"" in case of the TERC gene, or ""IS:promoter"" in case of the TERT gene)." +# [IPD2225-D01-P01-A08] - E.g., the target information string of BRAF states ""all_exons,introns[7-10];IS:exons[11-18]"", meaning that introns 7-10 and all exons were investigated at base sensitivity, while exons 11-18 were investigated at increased sensitivity." +Sample_ID Gene_symbol Ensembl_transcript_ID RefSeq_mRNA Genomic_location DNA_change cDNA_change Exon_number Protein_change_short Protein_change Change_summary Coding_status Depth_tumor_DNA AF_tumor_DNA Depth_normal_DNA AF_normal_DNA Depth_tumor_RNA AF_tumor_RNA COSMIC_ID dbSNP_rs_ID ClinVar_variation_ID Global_AF_gnomAD Global_AF_1KG NFE_AF_gnomAD Germline_population_AFs Mutation_hotspot TCGA_frequency ICGC_PCAWG_occurrence TSO500_recurrence Putative_driver Signaling_pathway Gene_predisposition Gene_CN Tier CPSR_ACMG_class CPSR_ClinVar_class TSO500_LocalApp_class Functional_domain Drugs Coding_transcript_consequences Inclusion_criteria Filters F1LCDx_targets IGV_QC Run_QC Class_judgement Class_judgement_comments Reporting_judgement Reporting_judgement_comments +IPD2225-D01-P01-A08 CHEK2 ENST00000382580 NM_001005735 22:29091856 AG>A c.1229del exon12 T410fs Thr410MetfsTer15 c.1229del:exon12:T410fs:Thr410MetfsTer15 frameshift_variant 285 0.042 NA NA 0 NA COSV60419771 rs555607708 128042 0,002044 1 0,002543 GLOBAL_AF_1KG:0.0010000|GLOBAL_AF_GNOMAD:0.0020440|ASJ_AF_GNOMAD:0.0015960|EUR_AF_1KG:0.0040000|FIN_AF_GNOMAD:0.0087310|NFE_AF_GNOMAD:0.0025430|OTH_AF_GNOMAD:0.0016460 NA NA NA T:1+0+0+4=5/1124|N:0+0+0+0=0/94|A:1+0+0+4=5/1218 NA TP53 NA [HC]_1.89/1.78_NA TIER_3 Likely_Benign VUS GL_DB Protein_kinase_domain NA "frameshift_variant:E:CHEK2:ENSP00000416721.1:p.(Thr111MetfsTer15)|E:CHEK2:ENSP00000329012.5:p.(Thr338MetfsTer15)|E:CHEK2:ENSP00000372021.1:p.(Thr276MetfsTer15)|R:CHEK2:Canonical:NP_009125.1:p.(Thr367MetfsTer15)|R:CHEK2:NP_001244316.1:p.(Thr146MetfsTer15)|R:CHEK2:NP_001005735.1:p.(Thr410MetfsTer15)|R:CHEK2:NP_665861.1:p.(Thr338MetfsTer15)|E:CHEK2:ENSP00000442458.1:p.(Thr146MetfsTer15)|E:CHEK2:ENSP00000385747.1:p.(Thr367MetfsTer15)|E:CHEK2:ENSP00000329178.6:p.(Thr367MetfsTer15)|E:CHEK2:ENSP00000386087.1:p.(Thr367MetfsTer15)|E:CHEK2:Canonical:ENSP00000372023.2:p.(Thr410MetfsTer15)|E:CHEK2:ENSP00000384919.1:p.(Thr276MetfsTer15)|E:CHEK2:ENSP00000384835.1:p.(Thr338MetfsTer15);3_prime_UTR_variant:E:CHEK2:ENST00000382566.1;downstream_gene_variant:E:CHEK2:ENST00000447421.1|E:CHEK2:ENST00000425190.2" protein_changes:frameshift_variant germline_population IS:all_exons OK QC done exclude germline_population . Needs to be changed to include after manuell QC as requested. Some reads have slightly lower mapping quality, but looks OK +IPD2225-D01-P01-A08 DDX41 ENST00000507955 NA 5:176943944 C>T c.3G>A exon1 M1? Met1? c.3G>A:exon1:M1?:Met1? start_lost 744 0.48 NA NA 0 NA NA rs141601766 224637 8.15E-05 4,00E-04 0.000156 EUR_AF_1KG:0.0020000 NA NA NA T:136+0+0+0=136/1127|N:3+0+0+0=3/94|A:139+0+0+0=139/1221 NA NA NA [non-HC]_2.17/3.134_NA TIER_4 VUS Likely_Pathogenic GL_DB NA NA "upstream_gene_variant:E:DDX41:Canonical:ENST00000330503.7;start_lost:R:DDX41:NP_057306.2:p.?|R:DDX41:Canonical:NP_057306.2:p.?|E:DDX41:ENSP00000422753.1:p.?;5_prime_UTR_variant:R:DDX41:NM_001321732.1|R:DDX41:NM_001321830.1;downstream_gene_variant:E:FAM193B:Canonical:ENST00000514747.1|R:FAM193B:NM_001190946.1|R:FAM193B:Canonical:NM_001190946.2|E:FAM193B:ENST00000443375.2|E:FAM193B:ENST00000329540.5|E:FAM193B:ENST00000524677.1" ClinVar_classification|protein_changes:start_lost HET_VAF_RANGE|germline_population NA . . exclude germline_population,variant_in_more_than_10%_of_tumor_samples_(12.07%)_with_VAF_under_0.01 . Should be presented in the report. +IPD2225-D01-P01-A08 TERT ENST00000310581 NM_198253 5:1295228 G>A NA NA NA NA NA x:noncoding_variant 15 0.467 NA NA 0 NA NA rs1242535815 1299388 NA NA NA NA NA NA BOCA-UK/Primary/2/61/0.0328|BTCA-SG/Primary/1/12/0.0833|CLLE-ES/Primary/1/90/0.0111|LINC-JP/Primary/10/28/0.3571|MELA-AU/Metastatic/7/60/0.1167|ORCA-IN/Primary/1/13/0.0769|PBCA-DE/Primary/5/230/0.0217|RECA-EU/Primary/2/74/0.027 T:0+9+9+29=47/457|N:0+0+0+0=0/22|A:0+9+9+29=47/479 NA NA NA NA TIER_2 Likely_Benign Pathogenic GL_P NA NA upstream_gene_variant:E:TERT:Canonical:ENST00000310581.5|R:TERT:NM_001193376.1|R:TERT:Canonical:NM_198253.2|E:TERT:ENST00000296820.5|E:TERT:ENST00000334602.6|E:TERT:ENST00000508104.2 tier|ClinVar_classification|class LOW_TUMOR_DP|HET_VAF_RANGE IS:promoter . . exclude LOW_TUMOR_DP . Should be presented in the report. +IPD2225-D01-P01-A08 TP53 ENST00000269305 NM_000546 17:7577120 C>T c.818G>A exon8 R273H Arg273His c.818G>A:exon8:R273H:Arg273His missense_variant 219 0.393 NA NA 950 0.877 COSV52660980|COSV52664805|COSV52676050|COSV52728930 rs28934576 12366 1.59E-05 2,00E-04 2.64E-05 EUR_AF_1KG:0.0010000 TP53|R273|0 Uterine_Carcinosarcoma:_5.3%_(3/57)|Rectum_Adenocarcinoma:_3.6%_(5/137)|Colon_Adenocarcinoma:_3%_(12/399)|Esophageal_Carcinoma:_2.7%_(5/184)|Uterine_Corpus_Endometrial_Carcinoma:_2.3%_(12/530)|Ovarian_Serous_Cystadenocarcinoma:_2.1%_(9/436)|Brain_Lower_Grade_Glioma:_2%_(10/509)|Head_and_Neck_Squamous_Cell_Carcinoma:_1.4%_(7/508)|Stomach_Adenocarcinoma:_1.4%_(6/437)|Breast_Invasive_Carcinoma:_1.2%_(12/986)|Mesothelioma:_1.2%_(1/82)|Pancreatic_Adenocarcinoma:_1.1%_(2/180)|Glioblastoma_Multiforme:_0.8%_(3/393)|Lung_Squamous_Cell_Carcinoma:_0.6%_(3/492)|Liver_Hepatocellular_Carcinoma:_0.5%_(2/364)|Sarcoma:_0.4%_(1/237)|Lung_Adenocarcinoma:_0.2%_(1/567) BTCA-SG/Primary/2/12/0.1667|CMDI-UK/Primary/1/30/0.0333|ESAD-UK/Primary/4/97/0.0412|MALY-DE/Primary/3/100/0.03|OV-AU/Primary/5/60/0.0833|PACA-AU/Primary/3/90/0.0333|PACA-CA/Primary/4/140/0.0286|PRAD-CA/Primary/1/110/0.0091 T:283+2+4+8=297/1125|N:7+0+1+0=8/94|A:290+2+5+8=305/1219 TP53:p.R273H:ENST00000359597&ENST00000269305&ENST00000455263&ENST00000420246&ENST00000445888:CTAT_POPULATION|CTAT_CANCER|3D_STRUCTURE TP53 4_c_NS [non-HC]_1.782/1.378_NA TIER_2 Pathogenic Pathogenic GL_P P53_DNA-binding_domain NA "missense_variant:E:TP53:ENSP00000352610.4:p.(Arg273His)|R:TP53:NP_001119587.1:p.(Arg141His)|R:TP53:NP_001263626.1:p.(Arg114His)|R:TP53:NP_001263628.1:p.(Arg114His)|R:TP53:NP_001119589.1:p.(Arg141His)|R:TP53:NP_001263627.1:p.(Arg114His)|R:TP53:NP_001119588.1:p.(Arg141His)|E:TP53:Canonical:ENSP00000269305.4:p.(Arg273His)|R:TP53:NP_001263625.1:p.(Arg234His)|R:TP53:NP_001119590.1:p.(Arg234His)|R:TP53:NP_001119585.1:p.(Arg273His)|R:TP53:Canonical:NP_000537.3:p.(Arg273His)|R:TP53:NP_001263689.1:p.(Arg234His)|R:TP53:NP_001263690.1:p.(Arg234His)|R:TP53:NP_001263624.1:p.(Arg234His)|R:TP53:NP_001119586.1:p.(Arg273His)|R:TP53:NP_001119584.1:p.(Arg273His)|E:TP53:ENSP00000398846.2:p.(Arg273His)|E:TP53:ENSP00000391127.2:p.(Arg273His)|E:TP53:ENSP00000391478.2:p.(Arg273His)|R:TP53:XP_005256835.1:p.(Arg253His)|E:TP53:ENSP00000425104.1:p.(Arg141His);upstream_gene_variant:E:TP53:ENST00000576024.1;downstream_gene_variant:E:TP53:ENST00000514944.1|E:TP53:ENST00000508793.1|E:TP53:ENST00000604348.1|E:TP53:ENST00000503591.1" ClinVar_classification|tier|CPSR_classification|class HET_VAF_RANGE|germline_population IS:all_exons . . exclude germline_population,variant_in_more_than_10%_of_tumor_samples_(25.16%)_with_VAF_under_0.01 . Should be presented in the report. +IPD2225-D01-P01-A08 TERT ENST00000310581 NM_198253 5:1295228 G>A NA NA NA NA NA x:noncoding_variant 15 0.467 NA NA 0 NA NA rs1242535815 1299388 NA NA NA NA NA NA BOCA-UK/Primary/2/61/0.0328|BTCA-SG/Primary/1/12/0.0833|CLLE-ES/Primary/1/90/0.0111|LINC-JP/Primary/10/28/0.3571|MELA-AU/Metastatic/7/60/0.1167|ORCA-IN/Primary/1/13/0.0769|PBCA-DE/Primary/5/230/0.0217|RECA-EU/Primary/2/74/0.027 T:0+9+9+29=47/457|N:0+0+0+0=0/22|A:0+9+9+29=47/479 NA NA NA NA TIER_2 Likely_Benign Pathogenic GL_P NA NA upstream_gene_variant:E:TERT:Canonical:ENST00000310581.5|R:TERT:NM_001193376.1|R:TERT:Canonical:NM_198253.2|E:TERT:ENST00000296820.5|E:TERT:ENST00000334602.6|E:TERT:ENST00000508104.2 tier|ClinVar_classification|class LOW_TUMOR_DP|HET_VAF_RANGE IS:promoter OK QC done exclude LOW_TUMOR_DP . Should be presented in the report. +IPD2225-D01-P01-A08 TP53 ENST00000269305 NM_000546 17:7577547 C>T c.734G>A exon7 G245D Gly245Asp c.734G>A:exon7:G245D:Gly245Asp missense_variant 756 0.198 NA NA 1030 0.538 COSV52666323|COSV52667838|COSV52745465 rs121912656 12355 4,00E-06 NA 0 NA TP53|G245|8.55e-208 Uterine_Carcinosarcoma:_1.8%_(1/57)|Ovarian_Serous_Cystadenocarcinoma:_0.9%_(4/436)|Rectum_Adenocarcinoma:_0.7%_(1/137)|Pancreatic_Adenocarcinoma:_0.6%_(1/180)|Head_and_Neck_Squamous_Cell_Carcinoma:_0.4%_(2/508)|Breast_Invasive_Carcinoma:_0.3%_(3/986)|Liver_Hepatocellular_Carcinoma:_0.3%_(1/364)|Bladder_Urothelial_Carcinoma:_0.2%_(1/412)|Brain_Lower_Grade_Glioma:_0.2%_(1/509) ESAD-UK/Primary/1/97/0.0103|LIRI-JP/Primary/1/250/0.004|OV-AU/Primary/1/60/0.0167 T:202+1+2+1=206/1125|N:13+0+0+0=13/94|A:215+1+2+1=219/1219 TP53:p.G245D:ENST00000413465&ENST00000359597&ENST00000269305&ENST00000455263&ENST00000420246&ENST00000445888:CTAT_POPULATION|CTAT_CANCER|3D_STRUCTURE TP53 4_c_NS [non-HC]_2.026/2.052_NA TIER_3 Likely_Pathogenic Likely_Pathogenic SOM P53_DNA-binding_domain NA "missense_variant:E:TP53:ENSP00000410739.2:p.(Gly245Asp)|E:TP53:ENSP00000352610.4:p.(Gly245Asp)|R:TP53:NP_001119587.1:p.(Gly113Asp)|R:TP53:NP_001263626.1:p.(Gly86Asp)|R:TP53:NP_001263628.1:p.(Gly86Asp)|R:TP53:NP_001119589.1:p.(Gly113Asp)|R:TP53:NP_001263627.1:p.(Gly86Asp)|R:TP53:NP_001119588.1:p.(Gly113Asp)|E:TP53:Canonical:ENSP00000269305.4:p.(Gly245Asp)|R:TP53:NP_001263625.1:p.(Gly206Asp)|R:TP53:NP_001119590.1:p.(Gly206Asp)|R:TP53:NP_001119585.1:p.(Gly245Asp)|R:TP53:Canonical:NP_000537.3:p.(Gly245Asp)|R:TP53:NP_001263689.1:p.(Gly206Asp)|R:TP53:NP_001263690.1:p.(Gly206Asp)|R:TP53:NP_001263624.1:p.(Gly206Asp)|R:TP53:NP_001119586.1:p.(Gly245Asp)|R:TP53:NP_001119584.1:p.(Gly245Asp)|E:TP53:ENSP00000398846.2:p.(Gly245Asp)|E:TP53:ENSP00000391127.2:p.(Gly245Asp)|E:TP53:ENSP00000391478.2:p.(Gly245Asp)|R:TP53:XP_005256835.1:p.(Gly225Asp)|E:TP53:ENSP00000425104.1:p.(Gly113Asp)|E:TP53:ENSP00000423862.1:p.(Gly152Asp);upstream_gene_variant:E:TP53:ENST00000576024.1;downstream_gene_variant:E:TP53:ENST00000508793.1|E:TP53:ENST00000604348.1|E:TP53:ENST00000503591.1" ClinVar_classification|class|CPSR_classification NA IS:all_exons OK QC done include variant_in_more_than_10%_of_tumor_samples_(17.96%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 RB1 ENST00000267163 NM_000321 13:48953730 C>T c.1333C>T exon14 R445X Arg445Ter c.1333C>T:exon14:R445X:Arg445Ter stop_gained,splice_region_variant 370 0.181 NA NA 0 NA COSV57295313 rs3092891 13071 4,00E-06 NA 8.9E-06 NA RB1|R445|0.0101 Glioblastoma_Multiforme:_0.5%_(2/393)|Sarcoma:_0.4%_(1/237)|Bladder_Urothelial_Carcinoma:_0.2%_(1/412) NA T:259+22+1+0=282/1121|N:0+2+0+0=2/94|A:259+24+1+0=284/1215 NA CC 3_NA_NA [non-HC]_2.038/2.076_NA TIER_3 Pathogenic Pathogenic SOM Retinoblastoma-associated_protein_A_domain NA splice_region_variant,stop_gained:R:RB1:Canonical:NP_000312.2:p.(Arg445Ter)|E:RB1:Canonical:ENSP00000267163.4:p.(Arg445Ter) ClinVar_classification|protein_changes:stop_gained|class|CPSR_classification NA IS:all_exons OK QC done include variant_in_more_than_10%_of_tumor_samples_(23.1%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 PTEN ENST00000371953 NM_000314 10:89720798 GTACT>G c.955_958del exon8 T319X Thr319Ter c.955_958del:exon8:T319X:Thr319Ter frameshift_variant 490 0.061 NA NA 0 NA NA rs146650273 71118 NA NA NA NA NA Uterine_Corpus_Endometrial_Carcinoma:_2.6%_(14/530)|Glioblastoma_Multiforme:_1%_(4/393)|Esophageal_Carcinoma:_0.5%_(1/184)|Stomach_Adenocarcinoma:_0.5%_(2/437)|Breast_Invasive_Carcinoma:_0.3%_(3/986)|Cervical_Squamous_Cell_Carcinoma_and_Endocervical_Adenocarcinoma:_0.3%_(1/289)|Kidney_Renal_Clear_Cell_Carcinoma:_0.3%_(1/336) BRCA-EU/Primary/2/76/0.0263 T:3+0+3+0=6/1125|N:0+0+0+0=0/94|A:3+0+3+0=6/1219 NA PI3K NA [HC]_1.824/1.648_NA TIER_3 Pathogenic Pathogenic SOM C2_domain_of_PTEN_tumour-suppressor_protein NA frameshift_variant:E:PTEN:Canonical:ENSP00000361021.3:p.(Thr319Ter)|R:PTEN:NP_000305.3:p.(Thr319Ter)|R:PTEN:Canonical:NP_000305.3:p.(Thr319Ter)|R:PTEN:NP_001291647.1:p.(Thr122Ter)|R:PTEN:NP_001291646.2:p.(Thr492Ter) ClinVar_classification|protein_changes:frameshift_variant|class|CPSR_classification NA IS:all_exons OK QC done include . . . +IPD2225-D01-P01-A08 MSH2 ENST00000233146 NM_000251 2:47702189 CAAT>C c.1786_1788del exon12 N596del Asn596del c.1786_1788del:exon12:N596del:Asn596del inframe_deletion 670 0.564 NA NA 180 0.333 NA rs63749831 1757 4,00E-06 NA 8.8E-06 NA NA NA NA T:0+0+0+2=2/1127|N:0+0+0+0=0/94|A:0+0+0+2=2/1221 NA NA 1_NA_NA [non-HC]_2.052/2.104_NA TIER_3 Likely_Pathogenic Pathogenic GL_P MutS_domain_III NA inframe_deletion:E:MSH2:Canonical:ENSP00000233146.2:p.(Asn596del)|E:MSH2:ENSP00000442697.1:p.(Asn530del)|R:MSH2:NP_001245210.1:p.(Asn530del)|R:MSH2:Canonical:NP_000242.1:p.(Asn596del)|R:MSH2:XP_005264390.1:p.(Asn546del)|R:MSH2:XP_005264389.1:p.(Asn596del)|E:MSH2:ENSP00000384199.1:p.(Asn596del) ClinVar_classification|class|CPSR_classification HET_VAF_RANGE introns[5],all_exons OK QC done include . . . +IPD2225-D01-P01-A08 LZTR1 ENST00000215739 NA 22:21348266 G>A c.1407G>A exon13 W469X Trp469Ter c.1407G>A:exon13:W469X:Trp469Ter stop_gained 463 0.607 NA NA 0 NA NA rs777243508 549753 0.000138 NA 9,00E-06 FIN_AF_GNOMAD:0.0014920 NA NA NA T:132+0+0+1=133/1127|N:4+0+0+0=4/94|A:136+0+0+1=137/1221 NA WNT NA [non-HC]_1.658/1.316_NA TIER_3 VUS Pathogenic GL_DB BTB/POZ_domain NA "stop_gained:E:LZTR1:Canonical:ENSP00000215739.8:p.(Trp469Ter)|R:LZTR1:Canonical:NP_006758.2:p.(Trp469Ter)|E:LZTR1:ENSP00000374006.3:p.(Trp450Ter);upstream_gene_variant:E:LZTR1:ENST00000415817.1" ClinVar_classification|protein_changes:stop_gained HET_VAF_RANGE|germline_population NA OK QC done exclude germline_population,variant_in_more_than_10%_of_tumor_samples_(11.71%)_with_VAF_under_0.01 . Should be presented in the report. +IPD2225-D01-P01-A08 HNF1A ENST00000257555 NM_000545 12:121432116 G>GC c.863_864insC exon4 P289fs Pro289AlafsTer28 c.863_864insC:exon4:P289fs:Pro289AlafsTer28 frameshift_variant 594 0.051 NA NA 0 NA COSV57461342 rs766191969 1328238 0.0003501 NA 0.0006016 NA NA NA NA T:86+454+11+1=552/1127|N:4+50+3+0=57/94|A:90+504+14+1=609/1221 NA NA NA [non-HC]_1.992/1.984_NA TIER_4 Pathogenic Pathogenic BL Hepatocyte_nuclear_factor_1_(HNF-1)|beta_isoform_C_terminus NA frameshift_variant:R:HNF1A:XP_005253989.1:p.(Pro172AlafsTer28)|R:HNF1A:XP_005253988.1:p.(Pro289AlafsTer28)|E:HNF1A:Canonical:ENSP00000257555.4:p.(Pro289AlafsTer28)|E:HNF1A:ENSP00000476181.1:p.(Pro289AlafsTer28)|R:HNF1A:NP_001293108.1:p.(Pro289AlafsTer28)|R:HNF1A:Canonical:NP_000536.5:p.(Pro289AlafsTer28)|E:HNF1A:ENSP00000475300.1:p.(Pro289AlafsTer28)|E:HNF1A:ENSP00000439721.1:p.(Pro172AlafsTer28)|E:HNF1A:ENSP00000443112.1:p.(Pro289AlafsTer28)|R:HNF1A:NP_000536.5:p.(Pro289AlafsTer28)|E:HNF1A:ENSP00000438804.1:p.(Pro289AlafsTer28) ClinVar_classification|protein_changes:frameshift_variant|CPSR_classification NA all_exons . . include BL_class . . +IPD2225-D01-P01-A08 GATA3 ENST00000379328 NM_001002295 10:8115788 G>T c.1137G>T exon6 V379V Val379= c.1137G>T:exon6:V379V:Val379= synonymous_variant 1019 0.583 NA NA 0 NA NA NA NA NA NA NA NA NA NA NA T:138+0+0+1=139/1127|N:5+0+0+0=5/94|A:143+0+0+1=144/1221 NA NA NA [non-HC]_2.168/2.336_NA TNC Likely_Benign NA GL_P NA NA NA class HET_VAF_RANGE all_exons . . include variant_in_more_than_10%_of_tumor_samples_(12.24%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 PREX2 ENST00000288368 NM_024870 8:68993013 AT>GC c.1818_1819delinsGC exon17 GL606-607GL GlyLeu606= c.1818_1819delinsGC:exon17:GL606-607GL:GlyLeu606= synonymous_variant 425 0.555 NA NA 0 NA NA rs368406603 NA NA NA NA NA NA NA NA T:7+5+87+794=893/1127|N:1+0+1+83=85/94|A:8+5+88+877=978/1221 NA NA NA [non-HC]_2.138/2.276_NA TNC NA NA GL_P NA NA NA class recurrent_germline|HET_VAF_RANGE NA . . exclude recurrent_germline . . +IPD2225-D01-P01-A08 DNMT3A ENST00000264709 NM_175629 2:25505146 G>A NA NA NA NA NA x:noncoding_variant 774 0.016 NA NA 0 NA COSV99262851 rs970486301 NA NA NA NA NA NA Uterine_Corpus_Endometrial_Carcinoma:_0.2%_(1/530) NA T:299+1+0+0=300/1127|N:8+0+0+0=8/94|A:307+1+0+0=308/1221 NA NA NA [non-HC]_1.938/1.876_NA TNC NA NA SOM NA Decitabine|Azacitidine 3_prime_UTR_variant:R:DNMT3A:NM_001320892.1|E:DNMT3A:ENST00000406659.3|R:DNMT3A:NM_175630.1 class LOW_TUMOR_VAF all_exons . . exclude LOW_TUMOR_VAF,variant_in_more_than_10%_of_tumor_samples_(26.53%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 ETS1 ENST00000392668 NM_001143820 11:128331011 G>A NA NA NA NA NA x:noncoding_variant 619 0.095 NA NA 1176 0.035 NA rs1029758313 NA NA NA NA NA NA NA NA T:427+7+0+0=434/1127|N:6+0+0+0=6/94|A:433+7+0+0=440/1221 NA NA NA [non-HC]_2.336/2.672_NA TNC NA NA SOM NA NA "3_prime_UTR_variant:E:ETS1:ENST00000345075.4|E:ETS1:ENST00000535549.1|R:ETS1:NM_001162422.1|R:ETS1:NM_005238.3|R:ETS1:NM_001330451.1|R:ETS1:Canonical:NM_001143820.1|R:ETS1:XM_005271428.1|E:ETS1:Canonical:ENST00000392668.4|E:ETS1:ENST00000526145.2;downstream_gene_variant:E:ETS1:ENST00000531611.1|E:ETS1:ENST00000319397.6" class NA NA . . include variant_in_more_than_10%_of_tumor_samples_(37.89%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 TP53 ENST00000269305 NM_000546 17:7578457 C>G c.473G>C exon5 R158P Arg158Pro c.473G>C:exon5:R158P:Arg158Pro missense_variant 557 0.657 NA NA NA NA COSV52676395|COSV52678258|COSV52680378|COSV53545833 rs587782144 246118 NA NA NA NA TP53|R158|8.56e-57 Glioblastoma_Multiforme:_0.3%_(1/393)|Lung_Adenocarcinoma:_0.2%_(1/567) PACA-CA/Primary/1/140/0.0071 T:17+0+0+0=17/1126|N:2+0+0+0=2/94|A:19+0+0+0=19/1220 TP53:p.R158P:ENST00000413465&ENST00000359597&ENST00000269305&ENST00000455263&ENST00000420246&ENST00000445888&ENST00000508793:CTAT_CANCER|3D_STRUCTURE TP53 4_c_NS [non-HC]_1.692/1.614_NA TIER_3 Likely_Pathogenic Likely_Pathogenic SOM P53_DNA-binding_domain NA "missense_variant:E:TP53:ENSP00000410739.2:p.(Arg158Pro)|E:TP53:ENSP00000352610.4:p.(Arg158Pro)|R:TP53:NP_001119587.1:p.(Arg26Pro)|R:TP53:NP_001119589.1:p.(Arg26Pro)|R:TP53:NP_001119588.1:p.(Arg26Pro)|E:TP53:Canonical:ENSP00000269305.4:p.(Arg158Pro)|R:TP53:NP_001263625.1:p.(Arg119Pro)|R:TP53:NP_001119590.1:p.(Arg119Pro)|R:TP53:NP_001119585.1:p.(Arg158Pro)|R:TP53:Canonical:NP_000537.3:p.(Arg158Pro)|R:TP53:NP_001263689.1:p.(Arg119Pro)|R:TP53:NP_001263690.1:p.(Arg119Pro)|R:TP53:NP_001263624.1:p.(Arg119Pro)|R:TP53:NP_001119586.1:p.(Arg158Pro)|R:TP53:NP_001119584.1:p.(Arg158Pro)|E:TP53:ENSP00000398846.2:p.(Arg158Pro)|E:TP53:ENSP00000391127.2:p.(Arg158Pro)|E:TP53:ENSP00000391478.2:p.(Arg158Pro)|R:TP53:XP_005256835.1:p.(Arg138Pro)|E:TP53:ENSP00000425104.1:p.(Arg26Pro)|E:TP53:ENSP00000423862.1:p.(Arg65Pro)|E:TP53:ENSP00000424104.1:p.(Arg158Pro);5_prime_UTR_variant:R:TP53:NM_001276697.1|R:TP53:NM_001276699.1|R:TP53:NM_001276698.1;upstream_gene_variant:E:TP53:ENST00000576024.1;downstream_gene_variant:E:TP53:ENST00000604348.1|E:TP53:ENST00000503591.1" class|ClinVar_classification|CPSR_classification NA IS:all_exons OK QC done include . . . +IPD2225-D01-P01-A08 SMARCA4 ENST00000429416 NM_001128844 19:11132513 C>T c.2729C>T exon20 T910M Thr910Met c.2729C>T:exon20:T910M:Thr910Met missense_variant 607 0.633 NA NA NA NA COSV60785486|COSV60802491 rs1238758086 438790 0 NA 0 NA SMARCA4|T910|3.38e-26 Uveal_Melanoma:_1.2%_(1/80)|Esophageal_Carcinoma:_1.1%_(2/184)|Thymoma:_0.8%_(1/123)|Stomach_Adenocarcinoma:_0.5%_(2/437)|Kidney_Renal_Clear_Cell_Carcinoma:_0.3%_(1/336)|Bladder_Urothelial_Carcinoma:_0.2%_(1/412) ESAD-UK/Primary/1/97/0.0103|PBCA-DE/Primary/1/230/0.0043 T:387+1+1+0=389/1127|N:15+0+0+0=15/94|A:402+1+1+0=404/1221 SMARCA4:p.T910M:ENST00000358026&ENST00000344626&ENST00000429416&ENST00000541122&ENST00000589677&ENST00000444061&ENST00000590574&ENST00000413806&ENST00000450717:CTAT_POPULATION|CTAT_CANCER NA NA [non-HC]_1.706/1.632_NA TIER_3 Likely_Pathogenic VUS GL_P SNF2-related_domain NA missense_variant:E:SMARCA4:Canonical:ENSP00000350720.2:p.(Thr910Met)|R:SMARCA4:NP_003063.2:p.(Thr910Met)|R:SMARCA4:NP_001122316.1:p.(Thr910Met)|R:SMARCA4:XP_005260086.1:p.(Thr910Met)|R:SMARCA4:XP_005260087.1:p.(Thr910Met)|R:SMARCA4:XP_005260084.1:p.(Thr910Met)|R:SMARCA4:XP_005260088.1:p.(Thr910Met)|R:SMARCA4:XP_005260082.1:p.(Thr910Met)|R:SMARCA4:XP_005260090.1:p.(Thr910Met)|R:SMARCA4:XP_005260091.1:p.(Thr910Met)|R:SMARCA4:XP_005260085.1:p.(Thr910Met)|R:SMARCA4:XP_005260093.1:p.(Thr914Met)|R:SMARCA4:XP_005260089.1:p.(Thr910Met)|R:SMARCA4:XP_005260092.1:p.(Thr910Met)|E:SMARCA4:ENSP00000343896.4:p.(Thr910Met)|E:SMARCA4:ENSP00000395654.1:p.(Thr910Met)|E:SMARCA4:ENSP00000445036.2:p.(Thr910Met)|E:SMARCA4:ENSP00000464778.1:p.(Thr910Met)|E:SMARCA4:ENSP00000392837.2:p.(Thr910Met)|R:SMARCA4:Canonical:NP_001122321.1:p.(Thr910Met)|E:SMARCA4:ENSP00000466963.1:p.(Thr910Met)|R:SMARCA4:XP_005260083.1:p.(Thr910Met)|E:SMARCA4:ENSP00000414727.2:p.(Thr910Met)|E:SMARCA4:ENSP00000397783.2:p.(Thr910Met)|R:SMARCA4:NP_001122319.1:p.(Thr910Met)|R:SMARCA4:NP_001122317.1:p.(Thr910Met)|R:SMARCA4:NP_001122320.1:p.(Thr910Met)|R:SMARCA4:NP_001122318.1:p.(Thr910Met) class|CPSR_classification HET_VAF_RANGE all_exons OK QC done include variant_in_more_than_10%_of_tumor_samples_(34.34%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 CDKN2A ENST00000498124 NM_001195132 9:21975017 C>T NA NA NA NA NA x:noncoding_variant 866 1 NA NA NA NA COSV58720858 rs3814960 873184 NA 0,4878 NA GLOBAL_AF_1KG:0.4878000|AFR_AF_1KG:0.1793000|AMR_AF_1KG:0.4424000|EAS_AF_1KG:0.6657000|EUR_AF_1KG:0.6243000|SAS_AF_1KG:0.6135000 NA NA NA T:11+3+50+943=1007/1123|N:2+0+0+81=83/94|A:13+3+50+1024=1090/1217 NA CC NA [non-HC]_2.678/2.848_NA TIER_2 Likely_Benign Benign GL_DB NA NA "upstream_gene_variant:R:CDKN2A:XM_005251343.1|R:CDKN2A:NM_058197.4|E:CDKN2A:ENST00000579122.1|E:CDKN2A:ENST00000578845.2|E:CDKN2A:ENST00000498124.1|E:CDKN2A:ENST00000479692.2|E:CDKN2A:ENST00000497750.1;5_prime_UTR_variant:R:CDKN2A:NM_001195132.1|R:CDKN2A:Canonical:NM_000077.4|E:CDKN2A:ENST00000304494.5|R:CDKN2A:XM_005251341.1|E:CDKN2A:ENST00000446177.1" tier recurrent_germline|HIGH_TUMOR_VAF|germline_population IS:all_exons . QC done exclude germline_population,recurrent_germline . . +IPD2225-D01-P01-A08 ZNF703 ENST00000331569 NM_025069 8:37555623 CA>C c.1205del exon2 H402fs His402ProfsTer8 c.1205del:exon2:H402fs:His402ProfsTer8 frameshift_variant 95 0.095 NA NA NA NA NA rs780099353 NA 0,000016 NA 0,0000215 NA NA NA NA T:4+27+7+0=38/1124|N:0+2+1+0=3/93|A:4+29+8+0=41/1217 NA NA NA [non-HC]_1.846/1.808_NA TIER_3 NA NA SOM NA NA frameshift_variant:E:ZNF703:Canonical:ENSP00000332325.4:p.(His402ProfsTer8)|R:ZNF703:Canonical:NP_079345.1:p.(His402ProfsTer8)|R:ZNF703:NP_079345.1:p.(His402ProfsTer8) class|protein_changes:frameshift_variant NA all_exons . . include . . . +IPD2225-D01-P01-A08 AMER1 ENST00000330258 NA X:63410564 C>A c.2603G>T exon2 S868I Ser868Ile c.2603G>T:exon2:S868I:Ser868Ile missense_variant 298 0.604 NA NA NA NA NA NA NA NA NA NA NA NA NA NA T:86+2+0+0=88/1126|N:3+0+0+0=3/94|A:89+2+0+0=91/1220 NA WNT NA [non-HC]_0.85/0.812_NA TIER_3 VUS NA SOM NA NA missense_variant:E:AMER1:Canonical:ENSP00000329117.3:p.(Ser868Ile)|R:AMER1:Canonical:NP_689637.3:p.(Ser868Ile)|R:AMER1:XP_005262298.1:p.(Ser591Ile) class HET_VAF_RANGE "aka[FAM123B];all_exons" . . include . . . +IPD2225-D01-P01-A08 LRP1B ENST00000389484 NM_018557 2:141641474 C>A c.4081G>T exon25 V1361L Val1361Leu c.4081G>T:exon25:V1361L:Val1361Leu missense_variant 817 0.442 NA NA NA NA COSV67290963 NA NA NA NA NA NA NA NA NA T:28+1+0+0=29/1127|N:1+0+0+0=1/94|A:29+1+0+0=30/1221 NA NA NA [non-HC]_2.256/2.32_NA TIER_3 NA NA SOM Low-density_lipoprotein_receptor_repeat_class_B NA missense_variant:E:LRP1B:Canonical:ENSP00000374135.3:p.(Val1361Leu)|R:LRP1B:Canonical:NP_061027.2:p.(Val1361Leu)|E:LRP1B:ENSP00000413239.1:p.(Val506Leu) class HET_VAF_RANGE NA . . include . . . +IPD2225-D01-P01-A08 STAT3 ENST00000264657 NM_001384992|NM_001369512|NM_001369513|NM_001384985|NM_139276 17:40490785 A>G c.514T>C exon6 F172L Phe172Leu c.514T>C:exon6:F172L:Phe172Leu missense_variant 345 0.293 NA NA NA NA NA NA NA NA NA NA NA NA NA NA T:21+0+0+0=21/1127|N:3+0+0+0=3/94|A:24+0+0+0=24/1221 NA NA NA [non-HC]_1.77/1.712_NA TIER_3 VUS NA SOM STAT_protein|all-alpha_domain NA missense_variant:R:STAT3:XP_005257676.1:p.(Phe267Leu)|R:STAT3:XP_005257670.1:p.(Phe267Leu)|R:STAT3:XP_005257677.1:p.(Phe267Leu)|R:STAT3:XP_005257673.1:p.(Phe267Leu)|R:STAT3:XP_005257672.1:p.(Phe267Leu)|E:STAT3:Canonical:ENSP00000264657.4:p.(Phe172Leu)|R:STAT3:XP_005257671.1:p.(Phe266Leu)|R:STAT3:XP_005257674.1:p.(Phe266Leu)|R:STAT3:XP_005257675.1:p.(Phe266Leu)|R:STAT3:NP_998827.1:p.(Phe172Leu)|R:STAT3:Canonical:NP_644805.1:p.(Phe172Leu)|R:STAT3:NP_003141.2:p.(Phe172Leu)|E:STAT3:ENSP00000467000.1:p.(Phe172Leu)|E:STAT3:ENSP00000373923.3:p.(Phe74Leu)|E:STAT3:ENSP00000467985.1:p.(Phe172Leu)|E:STAT3:ENSP00000384943.3:p.(Phe172Leu) class NA all_exons . . include . . . +IPD2225-D01-P01-A08 ARID2 ENST00000334344 NM_152641 12:46231389 CTGATGTGCTGCT>C c.1232_1243del exon10 D411_L414del Asp411_Leu414del c.1232_1243del:exon10:D411_L414del:Asp411_Leu414del inframe_deletion 537 0.073 NA NA 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA [non-HC]_2.07/2.35_NA TIER_3 VUS NA SOM NA NA inframe_deletion:E:ARID2:Canonical:ENSP00000335044.6:p.(Asp411_Leu414del)|R:ARID2:NP_001334768.1:p.(Asp411_Leu414del)|R:ARID2:Canonical:NP_689854.2:p.(Asp411_Leu414del)|R:ARID2:NP_689854.2:p.(Asp411_Leu414del)|E:ARID2:ENSP00000415650.1:p.(Asp262_Leu265del)|E:ARID2:ENSP00000397307.1:p.(Asp40_Leu43del) class NA NA . . include no_recurrence_data . . +IPD2225-D01-P01-A08 ARID1A ENST00000324856 NM_006015 1:27102086 G>A c.5012G>A exon19 R1671Q Arg1671Gln c.5012G>A:exon19:R1671Q:Arg1671Gln missense_variant 575 0.11 NA NA 0 NA NA rs773047184 NA 4,00E-06 NA 8.8E-06 NA NA NA NA T:241+3+0+0=244/1127|N:6+0+0+0=6/94|A:247+3+0+0=250/1221 NA NA NA [non-HC]_2.054/2.27_NA TIER_3 VUS NA SOM NA NA "missense_variant:R:ARID1A:NP_624361.1:p.(Arg1454Gln)|R:ARID1A:Canonical:NP_006006.3:p.(Arg1671Gln)|E:ARID1A:Canonical:ENSP00000320485.7:p.(Arg1671Gln)|R:ARID1A:XP_005246062.1:p.(Arg1670Gln)|E:ARID1A:ENSP00000387636.2:p.(Arg1454Gln)|E:ARID1A:ENSP00000363267.2:p.(Arg1288Gln)|E:ARID1A:ENSP00000390317.2:p.(Arg568Gln);5_prime_UTR_variant:E:ARID1A:ENST00000540690.1" class NA all_exons . . include variant_in_more_than_10%_of_tumor_samples_(21.38%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 AXIN2 ENST00000307078 NM_004655 17:63534410 C>G c.1111G>C exon5 A371P Ala371Pro c.1111G>C:exon5:A371P:Ala371Pro missense_variant 530 0.021 NA NA 0 NA NA NA NA NA NA NA NA NA NA NA T:28+1+0+0=29/1127|N:1+0+0+0=1/94|A:29+1+0+0=30/1221 NA WNT NA [non-HC]_1.858/1.29_NA TIER_3 VUS NA SOM NA NA missense_variant:R:AXIN2:XP_005257775.1:p.(Ala371Pro)|R:AXIN2:XP_005257774.1:p.(Ala371Pro)|R:AXIN2:XP_005257773.1:p.(Ala371Pro)|R:AXIN2:Canonical:NP_004646.3:p.(Ala371Pro)|R:AXIN2:XP_005257776.1:p.(Ala371Pro)|E:AXIN2:Canonical:ENSP00000302625.5:p.(Ala371Pro)|E:AXIN2:ENSP00000364854.5:p.(Ala371Pro) class NA NA . . include . . . +IPD2225-D01-P01-A08 ERBB3 ENST00000267101 NM_001982 12:56488226 G>A c.1745G>A exon15 G582E Gly582Glu c.1745G>A:exon15:G582E:Gly582Glu missense_variant 607 0.094 NA NA 0 NA COSV57263177 NA NA NA NA NA NA NA NA GACA-CN/Primary/1/32/0.0312 T:93+2+0+0=95/1127|N:3+0+0+0=3/94|A:96+2+0+0=98/1221 NA RTKRAS NA [HC]_2.28/3.4_NA TIER_3 NA NA SOM Growth_factor_receptor_domain_IV Patritumab|Vandetanib "missense_variant:E:ERBB3:Canonical:ENSP00000267101.3:p.(Gly582Glu)|R:ERBB3:Canonical:NP_001973.2:p.(Gly582Glu)|E:ERBB3:ENSP00000408340.2:p.(Gly523Glu)|E:ERBB3:ENSP00000448946.2:p.(Gly66Glu);upstream_gene_variant:E:ERBB3:ENST00000553131.1|E:ERBB3:ENST00000549832.1" class NA "all_exons;IS:exons[3,6-8,10,12,20-21,23-25]" . . include . . . +IPD2225-D01-P01-A08 ERBB3 ENST00000267101 NM_001982 12:56489568 G>A c.2033G>A exon17 R678K Arg678Lys c.2033G>A:exon17:R678K:Arg678Lys missense_variant 353 0.099 NA NA 0 NA COSV57246339 NA NA NA NA NA NA NA Lung_Adenocarcinoma:_0.2%_(1/567) NA T:79+1+0+0=80/1127|N:5+0+0+0=5/94|A:84+1+0+0=85/1221 NA RTKRAS NA [HC]_2.28/3.4_NA TIER_3 NA NA SOM NA Patritumab|Vandetanib "missense_variant:E:ERBB3:Canonical:ENSP00000267101.3:p.(Arg678Lys)|R:ERBB3:Canonical:NP_001973.2:p.(Arg678Lys)|E:ERBB3:ENSP00000399178.2:p.(Arg35Lys)|E:ERBB3:ENSP00000408340.2:p.(Arg619Lys)|E:ERBB3:ENSP00000448946.2:p.(Arg162Lys);upstream_gene_variant:E:ERBB3:ENST00000553131.1|E:ERBB3:ENST00000549832.1" class NA "all_exons;IS:exons[3,6-8,10,12,20-21,23-25]" . . include . . . +IPD2225-D01-P01-A08 FAT1 ENST00000441802 NM_005245 4:187524452 C>T c.11228G>A exon19 C3743Y Cys3743Tyr c.11228G>A:exon19:C3743Y:Cys3743Tyr missense_variant 928 0.13 NA NA 0 NA COSV71675006 NA NA NA NA NA NA NA NA ORCA-IN/Primary/1/13/0.0769 T:156+3+0+0=159/1127|N:5+0+0+0=5/94|A:161+3+0+0=164/1221 NA HIPPO NA [non-HC]_2.13/2.65_NA TIER_3 VUS NA SOM NA NA missense_variant:R:FAT1:Canonical:NP_005236.2:p.(Cys3743Tyr)|E:FAT1:Canonical:ENSP00000406229.2:p.(Cys3743Tyr)|R:FAT1:XP_005262892.1:p.(Cys3743Tyr)|R:FAT1:XP_005262891.1:p.(Cys3743Tyr) class NA NA . . include variant_in_more_than_10%_of_tumor_samples_(13.84%)_with_VAF_under_0.01 . . +IPD2225-D01-P01-A08 KMT2B ENST00000222270 NA 19:36214632 A>AG c.3059dup exonNA R1021fs Arg1021ProfsTer14 c.3059dup:exonNA:R1021fs:Arg1021ProfsTer14 frameshift_variant,splice_region_variant 441 0.995 NA NA NA NA NA rs11373774 NA 1 NA 1 GLOBAL_AF_GNOMAD:1.0000000|AFR_AF_1KG:1.0000000|AFR_AF_GNOMAD:1.0000000|AMR_AF_1KG:1.0000000|AMR_AF_GNOMAD:1.0000000|ASJ_AF_GNOMAD:1.0000000|EAS_AF_1KG:1.0000000|EAS_AF_GNOMAD:1.0000000|EUR_AF_1KG:1.0000000|FIN_AF_GNOMAD:0.9999000|NFE_AF_GNOMAD:1.0000000|OTH_AF_GNOMAD:1.0000000|SAS_AF_1KG:1.0000000|SAS_AF_GNOMAD:1.0000000 NA NA NA T:0+0+0+1127=1127/1127|N:0+0+0+94=94/94|A:0+0+0+1221=1221/1221 NA NA NA [non-HC]_2.872/3.09_NA TIER_4 NA NA BL NA NA "downstream_gene_variant:E:KMT2B:ENST00000341701.1;splice_acceptor_variant:E:KMT2B:ENSP00000398837.1:p.(Arg1021ProfsTer14);frameshift_variant,splice_region_variant:E:KMT2B:Canonical:ENSP00000222270.6:p.(Arg1021ProfsTer14)|R:KMT2B:NP_055542.1:p.(Arg1021ProfsTer14)|R:KMT2B:Canonical:NP_055542.1:p.(Arg1021ProfsTer14)|R:KMT2B:XP_005259503.1:p.(Arg262ProfsTer14)" protein_changes:frameshift_variant,splice_acceptor_variant recurrent_germline|HIGH_TUMOR_VAF|germline_population NA . QC done exclude germline_population,recurrent_germline,BL_class . . +IPD2225-D01-P01-A08 PRKDC ENST00000314191 NM_006904 8:48805816 A>AG c.3729dup exonNA R1244fs Arg1244ProfsTer41 c.3729dup:exonNA:R1244fs:Arg1244ProfsTer41 frameshift_variant,splice_region_variant,splice_polypyrimidine_tract_variant,intron_variant 833 0.987 NA NA NA NA NA rs11411516 NA 1 NA 1 GLOBAL_AF_GNOMAD:1.0000000|AFR_AF_1KG:1.0000000|AFR_AF_GNOMAD:0.9999000|AMR_AF_1KG:1.0000000|AMR_AF_GNOMAD:0.9999000|ASJ_AF_GNOMAD:1.0000000|EAS_AF_1KG:1.0000000|EAS_AF_GNOMAD:1.0000000|EUR_AF_1KG:1.0000000|FIN_AF_GNOMAD:1.0000000|NFE_AF_GNOMAD:1.0000000|OTH_AF_GNOMAD:1.0000000|SAS_AF_1KG:1.0000000|SAS_AF_GNOMAD:1.0000000 NA NA NA T:0+0+0+1127=1127/1127|N:0+0+0+94=94/94|A:0+0+0+1221=1221/1221 NA NA NA [non-HC]_3.482/3.852_NA TIER_4 NA NA BL NA NA "frameshift_variant,splice_region_variant:E:PRKDC:Canonical:ENSP00000313420.2:p.(Arg1244ProfsTer41)|E:PRKDC:ENSP00000345182.3:p.(Arg1244ProfsTer41);frameshift_variant:R:PRKDC:NM_001081640.1:c.3729dup(p.(Ter1244=))|R:PRKDC:Canonical:NM_006904.6:c.3729dup(p.(Ter1244=))" protein_changes:frameshift_variant recurrent_germline|HIGH_TUMOR_VAF|germline_population NA . QC done exclude germline_population,recurrent_germline,BL_class . . diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-R03-P01-A08_sample_QC_plot.pdf b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-R03-P01-A08_sample_QC_plot.pdf new file mode 100644 index 0000000..8af9f76 Binary files /dev/null and b/test_data/ous/test_files_for_PRONTO_newFunctions/IPD2225-R03-P01-A08_sample_QC_plot.pdf differ diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata.txt b/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata.txt new file mode 100644 index 0000000..7fd2177 --- /dev/null +++ b/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata.txt @@ -0,0 +1,24 @@ +# Information contained within this file is intended for use in automated creation of InPreD mol-MDT report (the PRONTO image). +# Each data row is meant for describing the meta-information of a single DNA or RNA sample sequenced with the TSO500 assay and post-processed with the TSOPPI package. +# - If TSOPPI is run with matching DNA and RNA, information from both DNA and RNA will be printed into the mol-MDT report. +"# - Please use ""-"" to represent any missing values." +# - Tabulator values should be used for separating individual columns. +# - The meta-information will be used as-is. +"# Overview of individual columns and their expected values (columns marked with ""[REQ]"" are required for mol-MDT report creation; columns without this tag are optional):" +"# - [REQ] ""Sample_id"": sample identifier as used in TSOPPI post-processing; the ID should follow the InPreD sample ID nomenclature;" +"# - [REQ] ""Sequencing_run_id"": sequencing run identifier as used in TSOPPI post-processing;" +"# - [REQ] ""Create_report"": mol-MDT reports will only be created for DNA samples with value ""Y"" in this column; expected values for DNA [""Y"", ""N""]; and for RNA ""-"";" +"# - ""Year_of_birth"": patient's year of birth; in integer format;" +"# - ""Year_of_diagnosis"": year of disease diagnosis; in integer format;" +"# - ""Clinical_diagnosis"": disease diagnosis (not limited to the PCGR tumor site classification); in string format;" +"# - ""Sex"": patient sex; expected values: [""M"", ""F""];" +"# - ""Study_id"": sample/patient study identifier, e.g., an IMPRESS id; in string format;" +"# - ""Sample_material_id"": sample material identifier as documented in the pathology system; in string format;" +"# - ""Year_of_sample_collection"": year of sample material collection; in integer format;" +"# - ""Requisition_hospital"": hospital requiring the InPreD sample analysis; in string format;" +"# - ""Extraction_hospital"": hospital undertaking the sample extraction; in string format." +"# - ""Tumor_content_pathology"": tumor content in percent estimated by the pathologist; not necessarily the value used in TSOPPI and written into the mol-MDT report;" +"# - ""Batch"": batch information from the wet lab." +Sample_id Sequencing_run_id Create_report Year_of_birth Year_of_diagnosis Clinical_diagnosis Sex Study_id Sample_material_id Year_of_sample_collection Requisition_hospital Extraction_hospital Tumor_content_pathology Batch Molecular_pathology_comment Sample_info_comment +IPD2225-D01-P01-A08 251114_A02134_0115_BHCJCKDRX7 Y 1960 - - M MRARE-X-xxxx 25materialX - OUS OUS 50 201 Comment_pathy Comment_sample +IPD2225-R03-P01-A08 251114_A02134_0115_BHCJCKDRX7 - 1960 - - M MRARE-X-xxxx 25materialX - OUS OUS 50 201 Comment_pathy Comment_sample diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata_tsoppi.txt b/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata_tsoppi.txt new file mode 100644 index 0000000..ac32cec --- /dev/null +++ b/test_data/ous/test_files_for_PRONTO_newFunctions/InPreD_PRONTO_metadata_tsoppi.txt @@ -0,0 +1,24 @@ +# Information contained within this file is intended for use in automated creation of InPreD mol-MDT report (the PRONTO image). +# Each data row is meant for describing the meta-information of a single DNA or RNA sample sequenced with the TSO500 assay and post-processed with the TSOPPI package. +# - If TSOPPI is run with matching DNA and RNA, information from both DNA and RNA will be printed into the mol-MDT report. +"# - Please use ""-"" to represent any missing values." +# - Tabulator values should be used for separating individual columns. +# - The meta-information will be used as-is. +"# Overview of individual columns and their expected values (columns marked with ""[REQ]"" are required for mol-MDT report creation; columns without this tag are optional):" +"# - [REQ] ""Sample_id"": sample identifier as used in TSOPPI post-processing; the ID should follow the InPreD sample ID nomenclature;" +"# - [REQ] ""Sequencing_run_id"": sequencing run identifier as used in TSOPPI post-processing;" +"# - [REQ] ""Create_report"": mol-MDT reports will only be created for DNA samples with value ""Y"" in this column; expected values for DNA [""Y"", ""N""]; and for RNA ""-"";" +"# - ""Year_of_birth"": patient's year of birth; in integer format;" +"# - ""Year_of_diagnosis"": year of disease diagnosis; in integer format;" +"# - ""Clinical_diagnosis"": disease diagnosis (not limited to the PCGR tumor site classification); in string format;" +"# - ""Sex"": patient sex; expected values: [""M"", ""F""];" +"# - ""Study_id"": sample/patient study identifier, e.g., an IMPRESS id; in string format;" +"# - ""Sample_material_id"": sample material identifier as documented in the pathology system; in string format;" +"# - ""Year_of_sample_collection"": year of sample material collection; in integer format;" +"# - ""Requisition_hospital"": hospital requiring the InPreD sample analysis; in string format;" +"# - ""Extraction_hospital"": hospital undertaking the sample extraction; in string format." +"# - ""Tumor_content_pathology"": tumor content in percent estimated by the pathologist; not necessarily the value used in TSOPPI and written into the mol-MDT report;" +"# - ""Batch"": batch information from the wet lab." +Sample_id Sequencing_run_id Create_report Year_of_birth Year_of_diagnosis Clinical_diagnosis Sex Study_id Sample_material_id Year_of_sample_collection Requisition_hospital Extraction_hospital Tumor_content_pathology Batch Material Sample_type Cancer_type_PCGR TMB_inpred_in_house TMB_TSO500 MSI_TSO500 Version_string_TSOPPI_reported_on_mol-MDT Molecular_pathology_comment Sample_info_comment +IPD2225-D01-P01-A08 251114_A02134_0115_BHCJCKDRX7 - 1960 - - M MRARE-X-xxxx 25materialX - OUS OUS 50 201 Archived FFPE Primary tumor naive CNS/Brain 17 14.9 (19) 4.13 (5/121) 2.2.0.12/0.3.2:22-06-07/1.0.0/1.0.0/hg19/TR Comment_pathy Comment_sample +IPD2225-R03-P01-A08 251114_A02134_0115_BHCJCKDRX7 - 1960 - - M MRARE-X-xxxx 25materialX - OUS OUS 50 201 Archived FFPE Primary tumor naive CNS/Brain - - - - Comment_pathy Comment_sample diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/output/IPD2225-D01-P01-A08_MTB_report.pptx b/test_data/ous/test_files_for_PRONTO_newFunctions/output/IPD2225-D01-P01-A08_MTB_report.pptx new file mode 100644 index 0000000..6fc451c Binary files /dev/null and b/test_data/ous/test_files_for_PRONTO_newFunctions/output/IPD2225-D01-P01-A08_MTB_report.pptx differ diff --git a/test_data/ous/test_files_for_PRONTO_newFunctions/sample_list.tsv b/test_data/ous/test_files_for_PRONTO_newFunctions/sample_list.tsv new file mode 100644 index 0000000..7d6b237 --- /dev/null +++ b/test_data/ous/test_files_for_PRONTO_newFunctions/sample_list.tsv @@ -0,0 +1,3 @@ +#sample_type sample_ID sample_pair_ID sample_output_ID sample_host_system_run_directory +DNA_tumor IPD2225-D01-P01-A08 IPD2225-D01-P01-A08_P IPD2225-D01-P01-A08 /data/sample_data/analysis_results/251114_A02134_0115_BHCJCKDRX7_TSO_500_LocalApp_results +RNA_tumor IPD2225-R03-P01-A08 IPD2225-R03-P01-A08_P IPD2225-R03-P01-A08 /data/sample_data/analysis_results/251114_A02134_0115_BHCJCKDRX7_TSO_500_LocalApp_results