RaderLabCode/R_Initialize.R at main · dfossl/RaderLabCode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# This loads all the function from the DifferentialExpressionFunctions.R script. Make sure its the right directory for you
source("R_initialize_metadata.R")
source("DifferentialExpressionFunctions.R")


set.seed(123)

#Setting up python instance
#cmd- which python3 gets path
{
use_python(pythonInstanceDir, required = T)
py_config()
source_python(pythonModuleDir)
}


#Input Data


# Setting Working Directory
# Shows current working directory.
getwd()

# If working Directory should change place path here.
setwd(workingDirectory)

# This location is where ouput Files will Start. Should be name of folder in working directory
OutputFileDirectory <- file.path(workingDirectory, outputDirName)

# Location of data
# This is usually in the working directory but doesn't have to be.
#Keep all data in one directory and padting the specific directory here
# can be useful for not having duplicates
CountFileDirectory <- CountFileDirectory
# AnnotationFileDirectory <- "/Users/dfossl/OneDrive/Documents/Dylan_School_Cloud/Rader Lab/Analysis-LP/Deseq2Analysis_LowPhosphorous/LP_annotation.csv"
AnnotationFileDirectory <- AnnotationFileDirectory

cts <- read.csv(CountFileDirectory, row.names=1)
coldata <- read.csv(AnnotationFileDirectory, row.names=1)
coldata <- droplevels(coldata)

# WARNING: A common error is having numbers in the annotation makes those columns not be considered Factors
# You can force a column to be considered a factor with the following code.
# coldata$column <- factor(coldata$column)


#Makes sure the columns in data match rows in annotation.
checkColumnsMatch(coldata, cts)
rownames(coldata) %in% colnames(cts)
if(!checkColumnsMatch(coldata, cts)){
  print("ERROR, you have rows and columns with different names or exrta rows or columns.
          Therefore reformat Data till TRUE. ")
}

#expVariables holds list of variable conditions
expvariables <- colnames(coldata)
expvariables

# Set what the minimum count you wish each row to sum too.
# Deseq2 Documentation claims more robust filtering in making the Deseq object
# So I am trusting them on this,
minimumCount <- 10