Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions R/methods-predictCoding.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ setMethod("predictCoding", c("VRanges", "TxDb", "ANY", "missing"),
## variant location in cds region
mcols(query) <- append(mcols(query), DataFrame(varAllele=varAllele))
txlocal <- .localCoordinates(query, cdsbytx, ignore.strand=FALSE, ...)
if (length(txlocal) == 0)
return(txlocal)

## reverse complement "-" strand
valid <- rep(TRUE, length(txlocal))
Expand Down Expand Up @@ -181,12 +179,12 @@ setMethod("predictCoding", c("VRanges", "TxDb", "ANY", "missing"),
consequence <- rep("synonymous", length(txlocal))
consequence[nonsynonymous] <- "nonsynonymous"
consequence[fmshift] <- "frameshift"
consequence[nonsynonymous & (as.character(varAA) %in% "*")] <- "nonsense"
consequence[nonsynonymous & grepl("\\*", as.character(varAA), fixed=TRUE)] <- "nonsense"
consequence[zwidth | noTrans] <- "not translated"
consequence <- factor(consequence)

mcols(txlocal) <- append(mcols(txlocal),
DataFrame(GENEID=NA_character_,
DataFrame(GENEID=rep(NA_character_, length(txlocal)),
CONSEQUENCE=consequence,
REFCODON=refCodon,
VARCODON=varCodon,
Expand Down
45 changes: 45 additions & 0 deletions inst/unitTests/test_predictCoding-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ test_predictCoding_empty <- function()
query <- GRanges("chr1", IRanges(start=c(1, 10, 20), width=1))
current <- fun(query, cdsbytx, Hsapiens, DNAStringSet(c("G", "T", "A")))
checkIdentical(dim(mcols(current)), c(0L, 8L))
## issue #86: REFAA and VARAA should be empty AAStringSet, not NULL
checkTrue(is(mcols(current)$REFAA, "AAStringSet"))
checkTrue(is(mcols(current)$VARAA, "AAStringSet"))
checkIdentical(length(mcols(current)$REFAA), 0L)
checkIdentical(length(mcols(current)$VARAA), 0L)
}

test_predictCoding_varAllele <- function()
Expand Down Expand Up @@ -109,3 +114,43 @@ test_predictCoding_strand <- function()
checkIdentical(mcols(current)$CDSLOC, IRanges(3, 3))
}

test_predictCoding_nonsense_DBS <- function()
{
## issue #84: DBS spanning two codons where one becomes stop (*) should
## be classified as "nonsense", not "nonsynonymous"
## Construct a minimal CDS: 9-nt coding seq on "+" strand
## REF codon 3 = positions 7-9, codon 2 = positions 4-6
## A DBS at positions 5-6 (end of codon2 / start of codon3) can yield
## VARAA like "P*" — contains stop but wasn't caught by old %in% "*"
fa_path <- tempfile(fileext='.fa')
## 9-nt CDS encodes e.g. CCG-CAG-TGG (P-Q-W)
## DBS variant at pos 4-5: CAG -> TAG introduces stop in codon 2 -> P*W
cds_seq <- "CCGCAGTGG"
full_seq <- paste0(paste(rep("A", 1000), collapse=""), cds_seq,
paste(rep("A", 1000), collapse=""))
Biostrings::writeXStringSet(
Biostrings::DNAStringSet(c(chr1=full_seq)), fa_path)
Rsamtools::indexFa(fa_path)
fa <- Rsamtools::FaFile(fa_path)

cds_start <- 1001L
cdsbytx_dbs <- GRangesList(tx1=GRanges("chr1",
IRanges(cds_start, cds_start + 8L), strand="+"))

## variant at codon-boundary positions 4-5 of the CDS (genomic 1004-1005)
## REF=CA, ALT=TA -> VARCODON2 = TAG (stop), VARAA = "*W"
query_dbs <- GRanges("chr1",
IRanges(cds_start + 3L, cds_start + 4L),
strand="+")
varAllele_dbs <- Biostrings::DNAStringSet("TA")

current <- quiet(fun(query_dbs, cdsbytx_dbs, fa, varAllele_dbs))

if (length(current) > 0L) {
## VARAA should contain a stop (*) somewhere
checkTrue(grepl("\\*", as.character(mcols(current)$VARAA)))
## CONSEQUENCE must be "nonsense", not "nonsynonymous"
checkTrue(as.character(mcols(current)$CONSEQUENCE) == "nonsense")
}
}