-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaakomp_plot.R
More file actions
86 lines (73 loc) · 2.37 KB
/
aakomp_plot.R
File metadata and controls
86 lines (73 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env Rscript
# aaKomp CDF plot script
# Written by Johnathan Wong
suppressPackageStartupMessages({
library(ggplot2)
library(dplyr)
library(readr)
})
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 3) {
cat("Usage: Rscript aakomp_plot.R <input_file> <target_value> <score_file>\n")
quit(status = 1)
}
in_file <- args[1]
target_line <- as.numeric(args[2])
score_file <- args[3]
base <- basename(in_file)
# Read the score value from the score file
score <- tryCatch({
as.numeric(readLines(score_file, warn = FALSE)[1])
}, error = function(e) {
cat(sprintf("Could not read score from %s\n", score_file))
quit(status = 1)
})
if (is.na(score)) {
cat(sprintf("Invalid numeric score in %s\n", score_file))
quit(status = 1)
}
# Read input file (skipping header)
data <- suppressWarnings(read_delim(
in_file,
delim = "\t", escape_double = FALSE,
col_names = FALSE, trim_ws = TRUE,
skip = 1, show_col_types = FALSE
))
if (ncol(data) < 9) {
cat(sprintf("File %s does not have enough columns\n", base))
quit(status = 1)
}
colnames(data)[6] <- "ratio"
colnames(data)[9] <- "id"
vals <- data %>%
filter(!is.na(ratio)) %>%
group_by(id) %>%
summarise(max_ratio = max(ratio), .groups = "drop") %>%
arrange(max_ratio) %>%
mutate(cdf = row_number() / n())
if (nrow(vals) == 0) {
cat(sprintf("No valid entries in %s\n", base))
quit(status = 1)
}
plot <- ggplot(vals, aes(x = max_ratio, y = cdf)) +
geom_step(direction = "hv", linewidth = 1) +
geom_vline(xintercept = target_line, linetype = "dotted", color = "red", linewidth = 1) +
annotate("text", x = target_line, y = 0.2, angle = 90, vjust = -0.5,
label = "Targeted miBF rescue threshold", color = "red", size = 5) +
annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
label = sprintf("aaKomp score = %.2f", score), size = 5) +
labs(
title = paste("aaKomp Cumulative Distribution Function (CDF) –", base),
x = expression(italic(k)*"-mer completeness ratio"),
y = "CDF"
) +
theme_minimal() +
theme(
panel.border = element_rect(color = "black", fill = NA),
axis.ticks = element_line(color = "black"),
axis.title = element_text(size = 20),
axis.text = element_text(size = 16),
plot.title = element_text(size = 20)
)
# Save the plot
ggsave(filename = sprintf("%s_cdf.png", in_file), plot = plot, width = 13.5, height = 9, dpi = 300)