-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathprep.sh
More file actions
executable file
·43 lines (36 loc) · 1.63 KB
/
prep.sh
File metadata and controls
executable file
·43 lines (36 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
# read arguments
INPUT="$1" # compressed/uncompressed FASTQ files
TARGET="$2" # target file, target file format is based on FASTX Barcode Splitter barcode file format (http://hannonlab.cshl.edu/fastx_toolkit/commandline.html#fastx_barcode_splitter_usage)
OUTPUT="$3"
MISMATCHES="$4" # number of allowed mismatches per target sequence
mkdir "$OUTPUT"
# convert FASTQ to FASTA, trim to 62 bp, keep 62 bp reads
zcat -f "$INPUT" | fastq_to_fasta -rnv -Q33 | fastx_trimmer -l 62 -v | fastx_clipper -a X -l 62 -n -v > "$OUTPUT"/trimmed.fasta
# join UMI subsequences
awk '{if (/^>/) print $0; else printf "%s%s%s\n", substr($0,1,4), substr($0,59,4), substr($0,5,54)}' "$OUTPUT"/trimmed.fasta > "$OUTPUT"/umi_joined.fasta
# demultiplex by locus
mkdir "$OUTPUT"/loci
cat "$OUTPUT"/umi_joined.fasta | fastx_barcode_splitter.pl --bcfile "$TARGET" --prefix "$OUTPUT"/loci/ --suffix ".fasta" --eol --mismatches $MISMATCHES --partial 0
# trim target sequence (54 bp), remove UMI with N
mkdir "$OUTPUT"/umis
for i in "$OUTPUT"/loci/*.fasta; do
locus="$(basename "$i" ".fasta")"
if [ -s "$i" ]; then
fastx_trimmer -t 54 -v -i "$i" | fastx_clipper -a X -l 6 -v > "$OUTPUT"/umis/"$locus".umi.fasta
else
echo ""$i" file is empty" >&2
touch "$OUTPUT"/umis/"$locus".umi.fasta
fi
done
# merge identical UMIs
mkdir "$OUTPUT"/merged
for i in "$OUTPUT"/umis/*.umi.fasta; do
locus="$(basename "$i" ".umi.fasta")"
if [[ -s "$i" ]]; then
fastx_collapser -v -i "$i" > "$OUTPUT"/merged/"$locus".merged.fasta
else
echo ""$i" file is empty" >&2
touch "$OUTPUT"/merged/"$locus".merged.fasta
fi
done