Skip to content

Commit a0e472a

Browse files
Doing the check on unmerged and then merged AODs in the parallelized sessions (#1698)
1 parent 1af58ff commit a0e472a

File tree

1 file changed

+86
-97
lines changed

1 file changed

+86
-97
lines changed

DATA/production/configurations/asyncReco/async_pass.sh

Lines changed: 86 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,51 @@
1212

1313
# function to run AOD merging
1414
run_AOD_merging() {
15-
o2-aod-merger --input list_$1.list --verbosity 1 --output $1/AO2D_merged.root > $1/merging.log
16-
exitcode=$?
15+
cd $1
16+
if [[ -f "AO2D.root" ]]; then
17+
ls "AO2D.root" > list_$1.list
18+
echo "Checking AO2Ds with un-merged DFs in $1"
19+
timeStartCheck=`date +%s`
20+
time root -l -b -q $O2DPG_ROOT/DATA/production/common/readAO2Ds.C > checkAO2D.log
21+
exitcode=$?
22+
timeEndCheck=`date +%s`
23+
timeUsedCheck=$(( $timeEndCheck-$timeStartCheck ))
24+
echo "Time spent to check unmerged AODs in dir $1 = $timeUsedCheck s"
25+
if [[ $exitcode -ne 0 ]]; then
26+
echo "exit code from AO2D check is " $exitcode > validation_error.message
27+
echo "exit code from AO2D check is " $exitcode
28+
echo "This means that the check for unmerged AODs in $1 FAILED, we'll make the whole processing FAIL"
29+
return $exitcode
30+
fi
31+
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
32+
echo "Merging AOD from the list list_$1.list"
33+
o2-aod-merger --input list_$1.list --verbosity 1 --output AO2D_merged.root > merging.log
34+
exitcode=$?
35+
if [[ $exitcode -ne 0 ]]; then
36+
echo "Exit code from the process merging DFs inside AO2D for $1 is " $exitcode > validation_error.message
37+
echo "Exit code from the process merging DFs inside AO2D for $1 is " $exitcode
38+
echo "This means that the merging of DFs for $1 FAILED, we'll make the whole processing FAIL"
39+
return $exitcode
40+
fi
41+
# now checking merged AODs
42+
echo "Checking AO2Ds with merged DFs in $AOD_DIR"
43+
timeStartCheckMergedAOD=`date +%s`
44+
time root -l -b -q '$O2DPG_ROOT/DATA/production/common/readAO2Ds.C("AO2D_merged.root")' > checkAO2D_merged.log
45+
exitcode=$?
46+
timeEndCheckMergedAOD=`date +%s`
47+
timeUsedCheckMergedAOD=$(( $timeEndCheckMergedAOD-$timeStartCheckMergedAOD ))
48+
echo "Time spent to check unmerged AODs in dir $1 = $timeUsedCheckMergedAOD s"
49+
if [[ $exitcode -ne 0 ]]; then
50+
echo "exit code from AO2D in $1 with merged DFs check is " $exitcode > validation_error.message
51+
echo "exit code from AO2D in $1 with merged DFs check is " $exitcode
52+
echo "This means that the check for merged AODs in $1 FAILED, we'll make the whole processing FAIL"
53+
else
54+
echo "All ok, replacing initial AO2D.root file in $1 with the one with merged DFs"
55+
mv AO2D_merged.root AO2D.root
56+
fi
57+
cd ..
58+
fi
59+
fi
1760
return $exitcode
1861
}
1962

@@ -716,12 +759,12 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
716759
if [[ $AOD_LIST_COUNT -ge 2 ]]; then
717760
AOD_LAST=`find . -name AO2D.root | sort | tail -1`
718761
CURRENT_SIZE=`wc -c $AOD_LAST | awk '{print $1}'`
719-
echo current size = $CURRENT_SIZE
762+
echo "current size of last AOD file = $CURRENT_SIZE"
720763
PERCENT=`echo "scale=2; $CURRENT_SIZE/($AOD_FILE_SIZE*10^6)*100" | bc -l`
721-
echo percent = $PERCENT
764+
echo "percentage compared to AOD_FILE_SIZE (= $AOD_FILE_SIZE) = $PERCENT"
722765
if (( $(echo "$PERCENT < $MIN_ALLOWED_AOD_PERCENT_SIZE" | bc -l) )); then
723766
AOD_LAST_BUT_ONE=`find . -name AO2D.root | sort | tail -2 | head -1`
724-
echo "Too small, merging $AOD_LAST with previous file $AOD_LAST_BUT_ONE"
767+
echo "Last AOD file too small, merging $AOD_LAST with previous file $AOD_LAST_BUT_ONE"
725768
ls $PWD/$AOD_LAST > listAOD.list
726769
ls $PWD/$AOD_LAST_BUT_ONE >> listAOD.list
727770
echo "List of files for merging:"
@@ -730,7 +773,7 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
730773
cd tmpAOD
731774
ln -s ../listAOD.list .
732775
timeStart=`date +%s`
733-
time o2-aod-merger --input listAOD.list
776+
time o2-aod-merger --input listAOD.list > merging_lastAOD.log
734777
exitcode=$?
735778
timeEnd=`date +%s`
736779
timeUsed=$(( $timeUsed+$timeEnd-$timeStart ))
@@ -776,90 +819,44 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
776819
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
777820
AOD_FILE=`echo $AOD_LIST | cut -d' ' -f$i`
778821
AOD_DIR=`dirname $AOD_FILE | sed -e 's|./||'`
779-
cd $AOD_DIR
780-
if [[ -f "AO2D.root" ]]; then
781-
echo "Checking AO2Ds with un-merged DFs in $AOD_DIR"
782-
timeStartCheck=`date +%s`
783-
time root -l -b -q $O2DPG_ROOT/DATA/production/common/readAO2Ds.C > checkAO2D.log
784-
exitcode=$?
785-
timeEndCheck=`date +%s`
786-
timeUsedCheck=$(( $timeUsedCheck+$timeEndCheck-$timeStartCheck ))
787-
if [[ $exitcode -ne 0 ]]; then
788-
echo "exit code from AO2D check is " $exitcode > validation_error.message
789-
echo "exit code from AO2D check is " $exitcode
790-
fi
791-
fi
792-
cd -
793-
ls $AOD_FILE > list_$AOD_DIR.list
794822
echo "$AOD_DIR" >> $JOB_LIST
795823
done
796-
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
797-
# spawning the parallel merging
798-
timeStartMerge=`date +%s`
799-
arr=()
800-
aods=()
801-
mergedok=()
802-
i=0
803-
while IFS= read -r line; do
804-
while [[ $CURRENT_POOL_SIZE -ge $MAX_POOL_SIZE ]]; do
805-
CURRENT_POOL_SIZE=`jobs -r | wc -l`
806-
sleep 1
807-
done
808-
run_AOD_merging $line &
809-
arr[$i]=$!
810-
aods[$i]=$line
811-
i=$((i+1))
824+
# spawning the parallel merging
825+
timeStartMerge=`date +%s`
826+
arr=()
827+
aods=()
828+
mergedok=()
829+
i=0
830+
while IFS= read -r line; do
831+
while [[ $CURRENT_POOL_SIZE -ge $MAX_POOL_SIZE ]]; do
812832
CURRENT_POOL_SIZE=`jobs -r | wc -l`
813-
done < $JOB_LIST
814-
# collecting return codes of the merging processes
815-
for i in "${!arr[@]}"; do
816-
wait ${arr[$i]}
817-
exitcode=$?
818-
if [[ $exitcode -ne 0 ]]; then
819-
echo "Exit code from the process merging DFs inside AO2D for ${aods[$i]} is " $exitcode > validation_error.message
820-
echo "Exit code from the process merging DFs inside AO2D for ${aods[$i]} is " $exitcode
821-
echo "This means that the merging of DFs for ${aods[$i]} FAILED, we make the whole processing FAIL"
822-
exit $exitcode
823-
mergedok[$((10#${aods[$i]}))]=0
824-
else
825-
echo "Merging of DFs inside the AO2D in ${aods[$i]} worked correctly"
826-
mergedok[$((10#${aods[$i]}))]=1
827-
fi
833+
sleep 1
828834
done
829-
timeEndMerge=`date +%s`
830-
timeUsedMerge=$(( $timeUsedMerge+$timeEndMerge-$timeStartMerge ))
831-
# Checking the merged AODs, and replacing the original ones with the merged ones if all is ok
832-
# This loop could be merged with the above, but for now we keep them separate
833-
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
834-
AOD_FILE=`echo $AOD_LIST | cut -d' ' -f$i`
835-
AOD_DIR=`dirname $AOD_FILE | sed -e 's|./||'`
836-
echo "Inspecting $AOD_DIR:"
837-
if [[ ${mergedok[$((10#$AOD_DIR))]} == 0 ]]; then
838-
echo "Merging for $AOD_DIR DID NOT work, we will do nothing for this file - BUT IT SHOULD HAVE NOT HAPPENED, PLEASE CHECK"
839-
exit 8
840-
continue
841-
else
842-
echo "Merging for $AOD_DIR DID work, let's continue"
843-
fi
844-
cd $AOD_DIR
845-
# now checking them
846-
echo "Checking AO2Ds with merged DFs in $AOD_DIR"
847-
timeStartCheckMergedAOD=`date +%s`
848-
time root -l -b -q '$O2DPG_ROOT/DATA/production/common/readAO2Ds.C("AO2D_merged.root")' > checkAO2D_merged.log
849-
exitcode=$?
850-
timeEndCheckMergedAOD=`date +%s`
851-
timeUsedCheckMergedAOD=$(( $timeUsedCheckMergedAOD+$timeEndCheckMergedAOD-$timeStartCheckMergedAOD ))
852-
if [[ $exitcode -ne 0 ]]; then
853-
echo "exit code from AO2D with merged DFs check is " $exitcode > validation_error.message
854-
echo "exit code from AO2D with merged DFs check is " $exitcode
855-
echo "We will keep the AO2Ds with unmerged DFs"
856-
else
857-
echo "All ok, replacing initial AO2D.root file with the one with merged DFs"
858-
mv AO2D_merged.root AO2D.root
859-
fi
860-
cd ..
861-
done
862-
fi
835+
run_AOD_merging $line &
836+
arr[$i]=$!
837+
aods[$i]=$line
838+
i=$((i+1))
839+
CURRENT_POOL_SIZE=`jobs -r | wc -l`
840+
done < $JOB_LIST
841+
# collecting return codes of the merging processes
842+
for i in "${!arr[@]}"; do
843+
wait ${arr[$i]}
844+
exitcode=$?
845+
if [[ $exitcode -ne 0 ]]; then
846+
echo "Exit code from the process check+merging+check_mergedAODs for ${aods[$i]} is " $exitcode > validation_error.message
847+
echo "Exit code from the process check+merging+check_mergedAODs for ${aods[$i]} is " $exitcode
848+
echo "This means that the process check+merging+check_mergedAODs for ${aods[$i]} FAILED, we make the whole processing FAIL"
849+
exit $exitcode
850+
mergedok[$((10#${aods[$i]}))]=0
851+
else
852+
echo "Merging of DFs inside the AO2D in ${aods[$i]} worked correctly"
853+
mergedok[$((10#${aods[$i]}))]=1
854+
fi
855+
done
856+
timeEndMerge=`date +%s`
857+
timeUsedMerge=$(( $timeUsedMerge+$timeEndMerge-$timeStartMerge ))
858+
echo "--> Total Time spent in checking and merging AODs = $timeUsedMerge s"
859+
863860
# running analysis QC if requested
864861
if [[ $ALIEN_JDL_RUNANALYSISQC == 1 ]]; then
865862
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
@@ -890,26 +887,18 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
890887
fi
891888
cd ..
892889
done
893-
else
894-
echo "Analysis QC will not be run, ALIEN_JDL_RUNANALYSISQC = $ALIEN_JDL_RUNANALYSISQC"
895-
fi
896-
echo "Time spent in checking initial AODs = $timeUsedCheck s"
897-
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
898-
echo "Time spent in merging AODs = $timeUsedMerge s"
899-
echo "Time spent in checking final AODs = $timeUsedCheckMergedAOD s"
900-
fi
901-
if [[ $ALIEN_JDL_RUNANALYSISQC == 1 ]]; then
902890
echo "Time spent in AnalysisQC = $timeUsedAnalysisQC s"
903891
else
892+
echo "Analysis QC will not be run, ALIEN_JDL_RUNANALYSISQC = $ALIEN_JDL_RUNANALYSISQC"
904893
echo "No timing reported for Analysis QC, since it was not run"
905894
fi
906895
fi
907896

908897

909898
timeEndFullProcessing=`date +%s`
910-
timeUsedFullProcessing=$(( $timeEndFullProcessing+$timeStartFullProcessing ))
899+
timeUsedFullProcessing=$(( $timeEndFullProcessing-$timeStartFullProcessing ))
911900

912-
echo "Time used for processing = $timeUsedFullProcessing s"
901+
echo "Total time used for processing = $timeUsedFullProcessing s"
913902

914903
if [[ $ALIEN_JDL_QCOFF != 1 ]]; then
915904
# copying the QC json file here

0 commit comments

Comments
 (0)