From b8bcae5b6c60d8fe04e1fcfa5c6e0a7b8a329d31 Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Mon, 28 Jul 2014 17:06:57 +0100 Subject: [PATCH 1/7] displays pvalues for all the covariates in the model --- script/run_deseq.R | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index f55bf9a..53a5a4c 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -10,6 +10,7 @@ sizeFactorsFile <- Args[7] qcPdfFile <- Args[8] filterPercentile <- as.numeric( Args[9] ) + # Get data and samples countData <- read.table( countFile, header=TRUE, row.names=1 ) samples <- read.table( samplesFile, header=TRUE, row.names=1 ) @@ -42,7 +43,7 @@ if (filterPercentile) { # Create DESeqDataSet (with design according to number of factors) dds <- DESeqDataSetFromMatrix(countData, samples, design = ~ condition) if (numFactors == 2) { - design(dds) <- formula(~ group + condition) + design(dds) <- formula(~ group * condition) } # Ensure control level (usually "sibling") is first level (i.e. before "mutant") @@ -51,14 +52,35 @@ colData(dds)$condition <- factor(colData(dds)$condition, # Differential expression analysis dds <- DESeq(dds) -res <- results(dds) + +# Find the condition term (does not contain '.' and constains 'condition') +non_interaction_terms = which(grepl("\\.",results_names,perl =T) == FALSE) +condition_terms_index = which(grepl("condition",results_names) == TRUE) +condition_terms_index = intersect(non_interaction_terms,condition_terms) +res <- results(dds,name=results_names[condition_terms_index] ) + +# create matrix with pvalues of all terms in the model +results_names = resultsNames(dds) +pvalues_matrix = matrix(ncol=0,nrow=nrow(dds)) +col_names<-c(); +for (i in 1:length(results_names) ) +{ + equation_term = results_names[i]; + results = results(dds, name= equation_term) + palues = as.matrix(results$pvalue) + padj = as.matrix(results$padj) + pvalues_matrix = cbind(pvalues_matrix,palues) + pvalues_matrix = cbind(pvalues_matrix,padj) + col_names<-c(col_names,paste(equation_term,"_pvalue",sep=""),paste(equation_term,"_pvalue_adjusted",sep="")) +} +colnames(pvalues_matrix)=col_names; + + # Write output -out <- data.frame(pvalue=res$pvalue, padj=res$padj, row.names=rownames(res)) -write.table( out, file=outputFile, col.names=FALSE, row.names=TRUE, - quote=FALSE, sep="\t" ) -write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE, - row.names=FALSE, quote=FALSE, sep="\t" ) +out <- data.frame(pvalues_matrix, row.names=rownames(res)) +write.table( out, file=outputFile, col.names=TRUE, row.names=TRUE, quote=FALSE, sep="\t" ) +write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE,row.names=FALSE, quote=FALSE, sep="\t" ) # Data transformations for QC rld <- rlogTransformation(dds, blind=TRUE) From aea2b6c462f3ce581ab574509ef7b18247b98bab Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Wed, 6 Aug 2014 11:26:36 +0100 Subject: [PATCH 2/7] reports LTR against intercept only nmodel --- script/run_deseq.R | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index 53a5a4c..8440e44 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -53,32 +53,16 @@ colData(dds)$condition <- factor(colData(dds)$condition, # Differential expression analysis dds <- DESeq(dds) -# Find the condition term (does not contain '.' and constains 'condition') -non_interaction_terms = which(grepl("\\.",results_names,perl =T) == FALSE) -condition_terms_index = which(grepl("condition",results_names) == TRUE) -condition_terms_index = intersect(non_interaction_terms,condition_terms) -res <- results(dds,name=results_names[condition_terms_index] ) - -# create matrix with pvalues of all terms in the model -results_names = resultsNames(dds) -pvalues_matrix = matrix(ncol=0,nrow=nrow(dds)) -col_names<-c(); -for (i in 1:length(results_names) ) -{ - equation_term = results_names[i]; - results = results(dds, name= equation_term) - palues = as.matrix(results$pvalue) - padj = as.matrix(results$padj) - pvalues_matrix = cbind(pvalues_matrix,palues) - pvalues_matrix = cbind(pvalues_matrix,padj) - col_names<-c(col_names,paste(equation_term,"_pvalue",sep=""),paste(equation_term,"_pvalue_adjusted",sep="")) -} -colnames(pvalues_matrix)=col_names; - - +# LRT against intercept only nmodel +dds <- DESeq(dds, test="LRT", full= ~ group * condition, reduced=~1) +LRTPvalue = mcols(dds)[1:nrow(dds), ]$LRTPvalue +LRTPvalue_adj = p.adjust(LRTPvalue, "BH") +LRTPvalue_matrix = matrix(ncol=0,nrow=nrow(dds)) +LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue) +LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue_adj) # Write output -out <- data.frame(pvalues_matrix, row.names=rownames(res)) +out <- data.frame(LRTPvalue_matrix, row.names=rownames(res)) write.table( out, file=outputFile, col.names=TRUE, row.names=TRUE, quote=FALSE, sep="\t" ) write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE,row.names=FALSE, quote=FALSE, sep="\t" ) From 17c554ed0065d71c1faaaabb9b899fea0f652744 Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Wed, 6 Aug 2014 15:56:40 +0100 Subject: [PATCH 3/7] added LTR test --- script/run_deseq.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index 8440e44..8f53086 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -10,7 +10,6 @@ sizeFactorsFile <- Args[7] qcPdfFile <- Args[8] filterPercentile <- as.numeric( Args[9] ) - # Get data and samples countData <- read.table( countFile, header=TRUE, row.names=1 ) samples <- read.table( samplesFile, header=TRUE, row.names=1 ) @@ -43,7 +42,7 @@ if (filterPercentile) { # Create DESeqDataSet (with design according to number of factors) dds <- DESeqDataSetFromMatrix(countData, samples, design = ~ condition) if (numFactors == 2) { - design(dds) <- formula(~ group * condition) + design(dds) <- formula(~ group + condition) } # Ensure control level (usually "sibling") is first level (i.e. before "mutant") @@ -52,6 +51,7 @@ colData(dds)$condition <- factor(colData(dds)$condition, # Differential expression analysis dds <- DESeq(dds) +<<<<<<< HEAD # LRT against intercept only nmodel dds <- DESeq(dds, test="LRT", full= ~ group * condition, reduced=~1) @@ -65,6 +65,16 @@ LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue_adj) out <- data.frame(LRTPvalue_matrix, row.names=rownames(res)) write.table( out, file=outputFile, col.names=TRUE, row.names=TRUE, quote=FALSE, sep="\t" ) write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE,row.names=FALSE, quote=FALSE, sep="\t" ) +======= +res <- results(dds) + +# Write output +out <- data.frame(pvalue=res$pvalue, padj=res$padj, row.names=rownames(res)) +write.table( out, file=outputFile, col.names=FALSE, row.names=TRUE, + quote=FALSE, sep="\t" ) +write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE, + row.names=FALSE, quote=FALSE, sep="\t" ) +>>>>>>> parent of b8bcae5... displays pvalues for all the covariates in the model # Data transformations for QC rld <- rlogTransformation(dds, blind=TRUE) From 0e7557c5046cea48e15a9f77d80654cf88a0840b Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Wed, 6 Aug 2014 16:07:35 +0100 Subject: [PATCH 4/7] had to resolve conflicts --- script/run_deseq.R | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index 8f53086..4593b57 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -51,7 +51,7 @@ colData(dds)$condition <- factor(colData(dds)$condition, # Differential expression analysis dds <- DESeq(dds) -<<<<<<< HEAD + # LRT against intercept only nmodel dds <- DESeq(dds, test="LRT", full= ~ group * condition, reduced=~1) @@ -65,16 +65,6 @@ LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue_adj) out <- data.frame(LRTPvalue_matrix, row.names=rownames(res)) write.table( out, file=outputFile, col.names=TRUE, row.names=TRUE, quote=FALSE, sep="\t" ) write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE,row.names=FALSE, quote=FALSE, sep="\t" ) -======= -res <- results(dds) - -# Write output -out <- data.frame(pvalue=res$pvalue, padj=res$padj, row.names=rownames(res)) -write.table( out, file=outputFile, col.names=FALSE, row.names=TRUE, - quote=FALSE, sep="\t" ) -write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE, - row.names=FALSE, quote=FALSE, sep="\t" ) ->>>>>>> parent of b8bcae5... displays pvalues for all the covariates in the model # Data transformations for QC rld <- rlogTransformation(dds, blind=TRUE) From 7fba6d04ee3fdbd4401625e04253f8ab3de329c1 Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Wed, 6 Aug 2014 16:11:03 +0100 Subject: [PATCH 5/7] added interactions --- script/run_deseq.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index 4593b57..3cbaced 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -42,7 +42,7 @@ if (filterPercentile) { # Create DESeqDataSet (with design according to number of factors) dds <- DESeqDataSetFromMatrix(countData, samples, design = ~ condition) if (numFactors == 2) { - design(dds) <- formula(~ group + condition) + design(dds) <- formula(~ group *condition) } # Ensure control level (usually "sibling") is first level (i.e. before "mutant") From 04c62b29df889a698138344c627cfc25302cf0b8 Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Thu, 7 Aug 2014 12:10:38 +0100 Subject: [PATCH 6/7] call to resultsNames(dds) was in wrong place --- script/run_deseq.R | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index 3cbaced..cd0ca71 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -10,6 +10,7 @@ sizeFactorsFile <- Args[7] qcPdfFile <- Args[8] filterPercentile <- as.numeric( Args[9] ) + # Get data and samples countData <- read.table( countFile, header=TRUE, row.names=1 ) samples <- read.table( samplesFile, header=TRUE, row.names=1 ) @@ -42,7 +43,7 @@ if (filterPercentile) { # Create DESeqDataSet (with design according to number of factors) dds <- DESeqDataSetFromMatrix(countData, samples, design = ~ condition) if (numFactors == 2) { - design(dds) <- formula(~ group *condition) + design(dds) <- formula(~ group * condition) } # Ensure control level (usually "sibling") is first level (i.e. before "mutant") @@ -52,17 +53,33 @@ colData(dds)$condition <- factor(colData(dds)$condition, # Differential expression analysis dds <- DESeq(dds) +# Find the condition term (does not contain '.' and constains 'condition') +results_names = resultsNames(dds) +non_interaction_terms = which(grepl("\\.",results_names,perl =T) == FALSE) +condition_terms_index = which(grepl("condition",results_names) == TRUE) +condition_terms_index = intersect(non_interaction_terms,condition_terms) +res <- results(dds,name=results_names[condition_terms_index] ) + +# create matrix with pvalues of all terms in the model + +pvalues_matrix = matrix(ncol=0,nrow=nrow(dds)) +col_names<-c(); +for (i in 1:length(results_names) ) +{ + equation_term = results_names[i]; + results = results(dds, name= equation_term) + palues = as.matrix(results$pvalue) + padj = as.matrix(results$padj) + pvalues_matrix = cbind(pvalues_matrix,palues) + pvalues_matrix = cbind(pvalues_matrix,padj) + col_names<-c(col_names,paste(equation_term,"_pvalue",sep=""),paste(equation_term,"_pvalue_adjusted",sep="")) +} +colnames(pvalues_matrix)=col_names; + -# LRT against intercept only nmodel -dds <- DESeq(dds, test="LRT", full= ~ group * condition, reduced=~1) -LRTPvalue = mcols(dds)[1:nrow(dds), ]$LRTPvalue -LRTPvalue_adj = p.adjust(LRTPvalue, "BH") -LRTPvalue_matrix = matrix(ncol=0,nrow=nrow(dds)) -LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue) -LRTPvalue_matrix = cbind(LRTPvalue_matrix,LRTPvalue_adj) # Write output -out <- data.frame(LRTPvalue_matrix, row.names=rownames(res)) +out <- data.frame(pvalues_matrix, row.names=rownames(res)) write.table( out, file=outputFile, col.names=TRUE, row.names=TRUE, quote=FALSE, sep="\t" ) write.table( sizeFactors( dds ), file=sizeFactorsFile, col.names=FALSE,row.names=FALSE, quote=FALSE, sep="\t" ) From e9ea85c2c5e8e64bd747dcf50cc589bd9015a70f Mon Sep 17 00:00:00 2001 From: Jorge Zamora Date: Thu, 7 Aug 2014 12:25:02 +0100 Subject: [PATCH 7/7] fixed bug in the selection of the main condition term --- script/run_deseq.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/script/run_deseq.R b/script/run_deseq.R index cd0ca71..fd2e68b 100644 --- a/script/run_deseq.R +++ b/script/run_deseq.R @@ -54,14 +54,12 @@ colData(dds)$condition <- factor(colData(dds)$condition, dds <- DESeq(dds) # Find the condition term (does not contain '.' and constains 'condition') -results_names = resultsNames(dds) +results_names=resultsNames(dds) non_interaction_terms = which(grepl("\\.",results_names,perl =T) == FALSE) -condition_terms_index = which(grepl("condition",results_names) == TRUE) -condition_terms_index = intersect(non_interaction_terms,condition_terms) -res <- results(dds,name=results_names[condition_terms_index] ) +all_condition_terms_index = which(grepl("condition",results_names) == TRUE) +condition_terms_index = intersect(non_interaction_terms,all_condition_terms_index) # create matrix with pvalues of all terms in the model - pvalues_matrix = matrix(ncol=0,nrow=nrow(dds)) col_names<-c(); for (i in 1:length(results_names) )