-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsequence_conservation.R
More file actions
executable file
·50 lines (30 loc) · 1.44 KB
/
sequence_conservation.R
File metadata and controls
executable file
·50 lines (30 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(tidyverse)
library(stringr)
library(rtracklayer)
options(stringsAsFactors=F)
# phastCons score file from UCSC
bw_path = "/PhastCons/hg38.phastCons7way.bw"
# Functions -------------------------------------------------------------------------------------------
get_conservation_score <- function(bw_path, gr, summaryFun = "mean"){
BigWigFile <- BigWigFile(bw_path)
phast_cons_score <- bw_path %>% str_replace(".*/", "") %>% str_extract("phastCons.*way")
gr_w_scores <- summary(BigWigFile, gr, size = 1L, type = summaryFun) %>% unlist()
stopifnot((width(gr) == width(gr_w_scores)))
elementMetadata(gr)[[str_c(summaryFun, "_", phast_cons_score)]] <- gr_w_scores$score
gr_w_scores <- gr
return(gr_w_scores)
}
# loading the UTR training regions
# this code can be modified to be used on other training regions and on ERs
load("/GS.RData")
utr.gr = makeGRangesFromDataFrame(utr, keep.extra.columns = TRUE)
# adding "chr" in front of seqnames
newStyle <- mapSeqlevels(seqlevels(utr.gr), "UCSC")
utr.gr <- renameSeqlevels(utr.gr, newStyle)
utr.export = get_conservation_score(bw_path, utr.gr) %>%
as.data.frame() %>%
mutate(class = "3-UTR") %>%
select(c(three_prime_utr_id, mean_phastCons7way, class)) %>%
plyr::rename(c("three_prime_utr_id" = "id"))
file_name = "/Features/phastCons_feat.txt"
write.table(utr.export, file = file_name, row.names = FALSE, quote = FALSE, sep="\t")