Skip to content

Commit f0b1ad6

Browse files
committed
UTILS: add helper script to create raw files from rawtf files for REPLAY datasets
1 parent 5af618d commit f0b1ad6

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed

UTILS/rawTF2raw/README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
The script `generate_rawtf_indices.sh` provides functions to extract a list of consecutive TFs form a list of `*rawtf*.tf` files and create the corresponding `*.raw` files, e.g. for REPLAY datasets at P2.
2+
3+
Input parameters for sourcing the script:
4+
- param1: list with input `*rawtf*.tf` files
5+
- param2: directory to store output `*.raw` files
6+
- param3: number of TFs to store as `*.raw` files
7+
- param4: tfCounter id of first TF to process, e.g. at least 3500 to skip ITS ROF rate ramp up
8+
- param5: number of input Blocks to be expected per TF to select only TFs with all inputs / detectors present
9+
- if number of inputs is irrelevant, it can be set to 0 to be ignored
10+
11+
Available functions by sourcing the script:
12+
- `check_tfs_per_file`
13+
- print the average number of TFs per file from a small subset of rawtf files from the input file list
14+
- `sort_tfs`
15+
- sort the TFs from the input file list in continuous order and save the corresponding timeslice ids in the order they appear in the input file list
16+
- if nBlocks (parameter 5) is not 0, then there is an additional check on the number of requested inputs defined by nBlocks
17+
- outputs:
18+
- tf-reader_*.log: full log output from o2-raw-tf-reader-workflow which is used to grep for the timeslice and tfCounter ids
19+
- tfids_*.txt: list with timeslice and tfCounter ids, sorted by tfCounter
20+
- timeslices_*.txt: sorted list of \$nTFs timeslice indices to be used for raw data creation
21+
- `create_raw_files`
22+
- use sorted list of timeslice ids created with sort_tfs as input to create *.raw files for those timeslices
23+
- the final command (for reference) and the full log output is written to \$outputDir.log
24+
25+
Example usage:
26+
```
27+
# source functions and set input / output parameters
28+
# in this case: process 125 TFs, start at tfCounter id 3500, check `*rawtf*.tf` files for number of inputs and only use TFs with (in this case) 14 inputs to ensure all detectors from this run are present
29+
source $O2DPG_ROOT/UTILS/rawTF2raw/generate_rawtf_indices.sh rawtflist_LHC25ab_563041.txt 2025-05-19-pp-750khz-replay-LHC25ab-563041 125 3500 14
30+
31+
# create input list of timeslice IDs to be processed for `*.raw` file creation (`timeslices_*.txt`)
32+
# timeslice IDs from this list correspond to \$nTFs consecutive tfCounter ids
33+
# intermediate outputs from o2-raw-tf-reader-workflow and the sorted list of all tfCounter ids will also be stored (`reader_*.log` and `tfids_*.txt`)
34+
sort_tfs
35+
36+
# create `*.raw` files for timeslices in `timeslices_*.txt` created in the previous step
37+
create_raw_files
38+
```
39+
40+
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/bin/bash
2+
3+
# source $O2DPG_ROOT/UTILS/rawTF2raw/generate_rawtf_indices.sh
4+
5+
print_help() {
6+
cat <<EOF
7+
Script to source functions to extract list of consecutive timeframes from a list of rawtf files and create the corresponding raw data files for the selected TFs
8+
9+
Functions:
10+
- check_tfs_per_file
11+
- print the average number of TFs per file from a small subset of rawtf files from the input file list
12+
- sort_tfs
13+
- sort the TFs from the input file list in continuous order and save the corresponding timeslice ids in the order they appear in the input file list
14+
- if nBlocks (parameter 5) is not 0, then there is an additional check on the number of requested inputs defined by nBlocks
15+
- outputs:
16+
- tf-reader_*.log: full log output from o2-raw-tf-reader-workflow which is used to grep for the timeslice and tfCounter ids
17+
- tfids_*.txt: list with timeslice and tfCounter ids, sorted by tfCounter
18+
- timeslices_*.txt: sorted list of \$nTFs timeslice indices to be used for raw data creation
19+
- create_raw_files
20+
- use sorted list of timeslice ids created with sort_tfs as input to create *.raw files for those timeslices
21+
- the final command (for reference) and the full log output is written to \$outputDir.log
22+
23+
Parameters:
24+
- param1: rawtf input file list
25+
- param2: output directory
26+
- param3: number of TFs to process
27+
- param4: tfCounter id of first TF to process
28+
- param5: number of Blocks to be expected per TF to select only TFs with all inputs / detectors present
29+
- if number of inputs is irrelevant, it can be set to 0 to be ignored
30+
31+
Example usage:
32+
33+
source $O2DPG_ROOT/UTILS/rawTF2raw/generate_rawtf_indices.sh rawtflist_LHC25ab_563041.txt 2025-05-19-pp-750khz-replay-LHC25ab-563041 125 3500 14
34+
sort_tfs
35+
create_raw_files
36+
37+
EOF
38+
39+
return
40+
}
41+
[[ $# == 0 ]] && print_help
42+
43+
# input parameters
44+
rawtfFileList=$1
45+
outputDir=$2
46+
nTFs=$3
47+
firstTF=${4:-3500}
48+
nBlocks=${5:-0}
49+
50+
# add # TFs to output directory name
51+
outputDir=$(echo $outputDir | sed "s/$/-${nTFs}tf/g")
52+
53+
# rawtfFileList=rawtflist_LHC24ak_553146.txt
54+
# outputDir=$(date +"%Y-%m-%d")-pp-500kHz-replay-LHC24ak_553146_500tf
55+
# nTFs=500
56+
# firstTF=3500
57+
# nBlocks=15
58+
59+
# output file names
60+
tfreader_log=tf-reader_$(echo ${rawtfFileList} | sed 's/.txt//g' | awk -F 'rawtflist_' '{print $2}').log
61+
tfs_sorted=tfids_$(echo ${rawtfFileList} | sed 's/.txt//g' | awk -F 'rawtflist_' '{print $2}')_sorted.txt
62+
timeslices_sorted=timeslices_$(echo ${rawtfFileList} | sed 's/.txt//g' | awk -F 'rawtflist_' '{print $2}')_sorted.txt
63+
64+
# export env variables
65+
## print prcoessing time info
66+
export DPL_REPORT_PROCESSING=1
67+
68+
# sourced functions
69+
check_tfs_per_file() {
70+
nFiles=${1:-10}
71+
nTFs=$(o2-raw-tf-reader-workflow --raw-only-det all --shm-segment-size 16000000000 --input-data $(cat ${rawtfFileList} | head -n ${nFiles} | sed -z 's/\n/,/g') -b --run | grep 'loops were sent' | awk -F' ' '{print $3}')
72+
echo "${nTFs} TFs found in ${nFiles} files: $(echo $((nTFs * 10000 / nFiles)) | sed -e 's/....$/.&/;t' -e 's/.$/.0&/') TFs per file"
73+
}
74+
75+
sort_tfs() {
76+
[[ ! -f ${tfreader_log} ]] && time o2-raw-tf-reader-workflow --raw-only-det all --shm-segment-size 16000000000 --input-data ${rawtfFileList} -b --run > ${tfreader_log}
77+
if [ "0$nBlocks" -eq "00" ]; then
78+
grep 'tf-reader.*Done processing' ${tfreader_log} | sed 's/,//g' | awk '{print $5,$6,$7,$9}' | sort -t ':' -k 3 -h >${tfs_sorted}
79+
else
80+
grep 'tf-reader' ${tfreader_log} | grep -v -e 'Executing sh' -e 'Resuming reading' -e 'Processing file' | grep "Block:${nBlocks}" -A 6 | grep 'Done processing' | sed 's/,//g' | awk '{print $5,$6,$7,$9}' | sort -t ':' -k 3 -h >${tfs_sorted}
81+
fi
82+
firstTFtmp=${firstTF}
83+
while true; do
84+
firstLine=$(grep -nr tfCounter:${firstTFtmp} ${tfs_sorted} | awk -F ':' '{print $1}')
85+
[[ ! -z ${firstLine} ]] && break
86+
firstTFtmp=$((firstTFtmp+1))
87+
done
88+
tail -n +${firstLine} ${tfs_sorted} | head -n ${nTFs} | awk '{print $1}' | sort -V | sed -z -e 's/timeslice://g ; s/\n/,/g ; s/,$//g' >${timeslices_sorted}
89+
}
90+
91+
# creation of raw data
92+
create_raw_files() {
93+
mkdir -p ${outputDir}
94+
echo "LID=\"$(cat ${timeslices_sorted})\"" | tee ${outputDir}.log
95+
LID=$(cat ${timeslices_sorted})
96+
echo "o2-raw-tf-reader-workflow --raw-only-det all --shm-segment-size 16000000000 --input-data ${rawtfFileList} --select-tf-ids " '$LID' " | o2-raw-data-dump-workflow --tof-input-uncompressed --shm-segment-size 16000000000 --fatal-on-deadbeef --output-directory ${outputDir} --dump-verbosity 1 --run | tee -a ${outputDir}.log" | tee -a ${outputDir}.log
97+
o2-raw-tf-reader-workflow --raw-only-det all --shm-segment-size 16000000000 --input-data ${rawtfFileList} --select-tf-ids "$LID" | o2-raw-data-dump-workflow --tof-input-uncompressed --shm-segment-size 16000000000 --fatal-on-deadbeef --output-directory ${outputDir} --dump-verbosity 1 --run | tee -a ${outputDir}.log
98+
}

0 commit comments

Comments
 (0)