-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFirst_layout.R
More file actions
139 lines (93 loc) · 4.1 KB
/
First_layout.R
File metadata and controls
139 lines (93 loc) · 4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
library(Seurat)
library(ggplot2)
library(dplyr)
library(reshape2)
library(gridarrange)
pbmc.data <- Read10X(data.dir = "data/filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k")
# write a dense matrix
write.table(pbmc.data,"PBMC_full_matrix.txt")
fd <- as.matrix(read.delim("../PBMC_full_matrix.txt",header=T,row.names=1,sep="\t"))
# Initialize the Seurat object with the raw (non-normalized data).
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
features <- apply(fd >0, 2, sum)
nCount <- apply(fd,2,sum)
perc.mt <- 100*apply(fd[grep("^MT-",rownames(fd)),],2,sum)/apply(fd,2,sum)
min.cls <- apply(fd >0, 1, sum)
qc.df <- data.frame(features=features,nCount=nCount,perc.mt=perc.mt)
qc.df.mlt <- melt(qc.df)
qc.feat <- data.frame(features=features)
qc.nCount <- data.frame(nCount=nCount)
qc.perc.mt <- data.frame(perc.mt=perc.mt)
p <- ggplot(melt(qc.feat), aes(variable,y=value))
p <- p + geom_violin() + geom_jitter(shape=".", position=position_jitter(0.2)) + facet_grid(rows = vars(variable),scales="free")
q <- ggplot(melt(qc.nCount), aes(variable,y=value))
q <- q + geom_violin() + geom_jitter(shape=".", position=position_jitter(0.2)) + facet_grid(rows = vars(variable),scales="free")
r <- ggplot(melt(qc.perc.mt), aes(variable,y=value))
r <- r + geom_violin() + geom_jitter(shape=".", position=position_jitter(0.2)) + facet_grid(rows = vars(variable),scales="free")
####
fill_color <- "lightcoral"
# Assuming your data frame is called qc.df
# Create separate violin plots for each variable
plots <- lapply(names(qc.df), function(variable) {
ggplot(qc.df, aes(x = 1, y = qc.df[[variable]], fill = fill_color)) +
geom_violin() +
labs(x = NULL, y = variable) +
scale_fill_manual(values = fill_color, name = variable) + # Set the fill color
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
geom_jitter(shape=".", position=position_jitter(0.2))
})
# Arrange the plots in a single row
grid.arrange(grobs = plots, ncol = length(plots))
p1 <- ggplot(qc.df, aes(x=nCount, y=perc.mt)) + geom_point(color = fill_color)
p2 <- ggplot(qc.df, aes(x=nCount, y=features)) + geom_point(color = fill_color)
p1|p2
## make the same plots as seurat scatters
#subset the data.frame:
d.filt <- qc.df %>% filter(features <= 2500) %>% filter(features >= 200) %>% filter(perc.mt <= 5)
count.filt <- fd[names(which(min.cls>2)),rownames(d.filt)]
#make a list
#######
# Use the package
library(SCAP)
pbmc.data <- readRDS("/home/shamit/Git/R_programming_2/PBMC_data.rds")
pbmc <- CreateMySCO(as.matrix(pbmc.data))
pbmc <- CalcMitoPct(pbmc,"^MT-")
#MakeQCPlots(pbmc)
pbmc <- FilterData(pbmc,sub="features > 200 & features < 2500 & perc.mt < 5",min.cells=3)
pbmc <- NormaliseData(pbmc,10000)
pbmc <- FindHVGs(pbmc,2000)
PlotHVGs(pbmc)
pbmc <- ScaleData(pbmc)
pbmc <- CalcPCs(pbmc)
pbmc <- ClusterCells(pbmc,nPC=10,nK=30,res=1)
pbmc <- MakeUMAP(pbmc,nPC=10)
dd <- pbmc@data.scale[pbmc@hvgs,]
dd.pr <- prcomp(t(dd),center=FALSE)$x
snn <- RANN::nn2(dd.pr[,1:10], k=30)$nn.idx
adjacency_matrix <- matrix(0L, nrow(dd.pr), nrow(dd.pr))
rownames(adjacency_matrix) <- colnames(adjacency_matrix) <- colnames(dd)
for(ii in 1:nrow(dd.pr)) {
adjacency_matrix[ii,rownames(dd.pr)[snn[ii,]]] <- 1L
}
#check that rows add to k
sum(adjacency_matrix[1,]) == 30
table(apply(adjacency_matrix, 1, sum))
#clus <- leiden(adjacency_matrix)
clus <- cluster_louvain(graph_from_adjacency_matrix(adjacency_matrix,mode ="undirected"),resolution = 1.2)$membership
plot(dd.pr[,1:2])
points(dd.pr[which(clus==1),1:2],col="red")
ump <- umap(dd.pr[,1:10])
#ump.gex <- umap(t(pbmc@data.scale[pbmc@hvgs,]))
plot(ump$layout)
tcols <- terrain.colors(max(clus))
points(ump$layout[which(clus==1),1:2],col="red")
for(i in 1:max(clus)){
points(ump$layout[which(clus==i),1:2],col=tcols[i])
}