forked from ewongma/pyEVA
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathPlotData.py
More file actions
148 lines (137 loc) · 6.66 KB
/
Copy pathPlotData.py
File metadata and controls
148 lines (137 loc) · 6.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import glob
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
import os
import shutil
from LoadCSV import LoadCSV
class PlotData:
def __init__(self, level: int):
self.files = []
self.level = level
self._find_all_xml("input/*.mz*ML")
def _find_all_xml(self, path: str):
for name in glob.glob(path):
self.files.append(name)
def plot_eic(self):
if len(self.files) == 1:
robjects.r('''
library(dplyr)
library(xcms)
library(ggplot2)
library(scales)
peak_smooth <- function(x,level){
n <- level
if(length(x) < 2*n){
return(x)
} else if(length(unique(x))==1){
return(x)
} else{
y <- vector(length=length(x))
for(i in 1:n){
y[i] <- sum(c((n-i+2):(n+1),n:1)*x[1:(i+n)])/sum(c((n-i+2):(n+1),n:1))
}
for(i in (n+1):(length(y)-n)){
y[i] <- sum(c(1:(n+1),n:1)*x[(i-n):(i+n)])/sum(c(1:(n+1),n:1))
}
for(i in (length(y)-n+1):length(y)){
y[i] <- sum(c(1:n,(n+1):(n+i-length(x)+1))*x[(i-n):length(x)])/sum(c(1:n,(n+1):(n+i-length(x)+1)))
}
return(y)
}
}
oneObject <- function(featureTable, input.files, level) {
xraw <- xcmsRaw(toString(input.files), profstep=0, mslevel = 1)
plot.matrix <- featureTable
plotmz.tol <- 0.01
plotrt.tol <- 60
if(nrow(plot.matrix)!=0){
for(k in 1:nrow(plot.matrix)){
rt.lower.limit <- plot.matrix$rt[k] - plotrt.tol
rt.upper.limit <- plot.matrix$rt[k] + plotrt.tol
mass.lower.limit <- plot.matrix$mz[k] - plotmz.tol
mass.upper.limit <- plot.matrix$mz[k] + plotmz.tol
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange) #extracted EIC object
points <- cbind(xraw@scantime[eeic$scan], peak_smooth(eeic$intensity, level))
png(file = paste0(rownames(plot.matrix)[k], ".png"),
width = 480, height = 480)
eic <- plot(points, type="l", main=" ", xlab="Seconds",
ylab="Intensity", xlim=RTRange)
dev.off()
}
}
}
''')
pandas2ri.activate()
one_object = robjects.r['oneObject']
one_object(LoadCSV().featureTable, self.files, self.level)
else:
robjects.r('''
library(dplyr)
library(xcms)
library(ggplot2)
library(scales)
peak_smooth <- function(x,level){
n <- level
if(length(x) < 2*n){
return(x)
} else if(length(unique(x))==1){
return(x)
} else{
y <- vector(length=length(x))
for(i in 1:n){
y[i] <- sum(c((n-i+2):(n+1),n:1)*x[1:(i+n)])/sum(c((n-i+2):(n+1),n:1))
}
for(i in (n+1):(length(y)-n)){
y[i] <- sum(c(1:(n+1),n:1)*x[(i-n):(i+n)])/sum(c(1:(n+1),n:1))
}
for(i in (length(y)-n+1):length(y)){
y[i] <- sum(c(1:n,(n+1):(n+i-length(x)+1))*x[(i-n):length(x)])/sum(c(1:n,(n+1):(n+i-length(x)+1)))
}
return(y)
}
}
moreObject <- function(featureTable, input.files, level) {
xraws <- list()
for(j in 1:length(input.files)){
xraws[[j]] <- xcmsRaw(toString(input.files[j]),profstep=0, mslevel = 1)
}
plot.matrix <- featureTable
plotmz.tol <- 0.01
plotrt.tol <- 60
if(nrow(plot.matrix)!=0){
for(k in 1:nrow(plot.matrix)){
rt.lower.limit <- plot.matrix$rt[k] - plotrt.tol
rt.upper.limit <- plot.matrix$rt[k] + plotrt.tol
mass.lower.limit <- plot.matrix$mz[k] - plotmz.tol
mass.upper.limit <- plot.matrix$mz[k] + plotmz.tol
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xraws[[plot.matrix$sample[k]]], mzrange=mzRange, rtrange=RTRange) #extracted EIC object
points <- cbind(xraws[[plot.matrix$sample[k]]]@scantime[eeic$scan], peak_smooth(eeic$intensity, level))
png(file = paste0(rownames(plot.matrix)[k], ".png"),
width = 480, height = 480)
eic <- plot(points, type="l", main=" ", xlab="Seconds",
ylab="Intensity", xlim=RTRange)
dev.off()
}
}
}
''')
pandas2ri.activate()
more_object = robjects.r['moreObject']
more_object(LoadCSV().featureTable, self.files, self.level)
# all the eic images will be stored under classifier/dataset/test and will overwrite
print("EICs are plotting now")
if len(os.listdir('classifier/EICplots')) != 0:
print("The test folder is not empty; re-running will replace all the existing EICs")
for image in glob.glob('classifier/EICplots/*.png'):
os.remove(image)
for image in glob.glob("*.png"):
new_path = "classifier/EICplots/" + image
shutil.move(image, new_path)
if __name__ == '__main__':
userinput = int(input("Enter a smoothing level (e.g., 0, 1, or 2. ‘0’ means no smoothing.):\n"))
a = PlotData(level=userinput)
a.plot_eic()