Skip to content

Commit 776e61c

Browse files
chiarazampollialcaliva
authored andcommitted
Doing the check on unmerged and then merged AODs in the parallelized sessions (#1698)
(cherry picked from commit a0e472a)
1 parent 1ebf857 commit 776e61c

File tree

1 file changed

+86
-97
lines changed

1 file changed

+86
-97
lines changed

DATA/production/configurations/asyncReco/async_pass.sh

Lines changed: 86 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,51 @@
1212

1313
# function to run AOD merging
1414
run_AOD_merging() {
15-
o2-aod-merger --input list_$1.list --verbosity 1 --output $1/AO2D_merged.root > $1/merging.log
16-
exitcode=$?
15+
cd $1
16+
if [[ -f "AO2D.root" ]]; then
17+
ls "AO2D.root" > list_$1.list
18+
echo "Checking AO2Ds with un-merged DFs in $1"
19+
timeStartCheck=`date +%s`
20+
time root -l -b -q $O2DPG_ROOT/DATA/production/common/readAO2Ds.C > checkAO2D.log
21+
exitcode=$?
22+
timeEndCheck=`date +%s`
23+
timeUsedCheck=$(( $timeEndCheck-$timeStartCheck ))
24+
echo "Time spent to check unmerged AODs in dir $1 = $timeUsedCheck s"
25+
if [[ $exitcode -ne 0 ]]; then
26+
echo "exit code from AO2D check is " $exitcode > validation_error.message
27+
echo "exit code from AO2D check is " $exitcode
28+
echo "This means that the check for unmerged AODs in $1 FAILED, we'll make the whole processing FAIL"
29+
return $exitcode
30+
fi
31+
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
32+
echo "Merging AOD from the list list_$1.list"
33+
o2-aod-merger --input list_$1.list --verbosity 1 --output AO2D_merged.root > merging.log
34+
exitcode=$?
35+
if [[ $exitcode -ne 0 ]]; then
36+
echo "Exit code from the process merging DFs inside AO2D for $1 is " $exitcode > validation_error.message
37+
echo "Exit code from the process merging DFs inside AO2D for $1 is " $exitcode
38+
echo "This means that the merging of DFs for $1 FAILED, we'll make the whole processing FAIL"
39+
return $exitcode
40+
fi
41+
# now checking merged AODs
42+
echo "Checking AO2Ds with merged DFs in $AOD_DIR"
43+
timeStartCheckMergedAOD=`date +%s`
44+
time root -l -b -q '$O2DPG_ROOT/DATA/production/common/readAO2Ds.C("AO2D_merged.root")' > checkAO2D_merged.log
45+
exitcode=$?
46+
timeEndCheckMergedAOD=`date +%s`
47+
timeUsedCheckMergedAOD=$(( $timeEndCheckMergedAOD-$timeStartCheckMergedAOD ))
48+
echo "Time spent to check unmerged AODs in dir $1 = $timeUsedCheckMergedAOD s"
49+
if [[ $exitcode -ne 0 ]]; then
50+
echo "exit code from AO2D in $1 with merged DFs check is " $exitcode > validation_error.message
51+
echo "exit code from AO2D in $1 with merged DFs check is " $exitcode
52+
echo "This means that the check for merged AODs in $1 FAILED, we'll make the whole processing FAIL"
53+
else
54+
echo "All ok, replacing initial AO2D.root file in $1 with the one with merged DFs"
55+
mv AO2D_merged.root AO2D.root
56+
fi
57+
cd ..
58+
fi
59+
fi
1760
return $exitcode
1861
}
1962

@@ -713,12 +756,12 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
713756
if [[ $AOD_LIST_COUNT -ge 2 ]]; then
714757
AOD_LAST=`find . -name AO2D.root | sort | tail -1`
715758
CURRENT_SIZE=`wc -c $AOD_LAST | awk '{print $1}'`
716-
echo current size = $CURRENT_SIZE
759+
echo "current size of last AOD file = $CURRENT_SIZE"
717760
PERCENT=`echo "scale=2; $CURRENT_SIZE/($AOD_FILE_SIZE*10^6)*100" | bc -l`
718-
echo percent = $PERCENT
761+
echo "percentage compared to AOD_FILE_SIZE (= $AOD_FILE_SIZE) = $PERCENT"
719762
if (( $(echo "$PERCENT < $MIN_ALLOWED_AOD_PERCENT_SIZE" | bc -l) )); then
720763
AOD_LAST_BUT_ONE=`find . -name AO2D.root | sort | tail -2 | head -1`
721-
echo "Too small, merging $AOD_LAST with previous file $AOD_LAST_BUT_ONE"
764+
echo "Last AOD file too small, merging $AOD_LAST with previous file $AOD_LAST_BUT_ONE"
722765
ls $PWD/$AOD_LAST > listAOD.list
723766
ls $PWD/$AOD_LAST_BUT_ONE >> listAOD.list
724767
echo "List of files for merging:"
@@ -727,7 +770,7 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
727770
cd tmpAOD
728771
ln -s ../listAOD.list .
729772
timeStart=`date +%s`
730-
time o2-aod-merger --input listAOD.list
773+
time o2-aod-merger --input listAOD.list > merging_lastAOD.log
731774
exitcode=$?
732775
timeEnd=`date +%s`
733776
timeUsed=$(( $timeUsed+$timeEnd-$timeStart ))
@@ -773,90 +816,44 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
773816
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
774817
AOD_FILE=`echo $AOD_LIST | cut -d' ' -f$i`
775818
AOD_DIR=`dirname $AOD_FILE | sed -e 's|./||'`
776-
cd $AOD_DIR
777-
if [[ -f "AO2D.root" ]]; then
778-
echo "Checking AO2Ds with un-merged DFs in $AOD_DIR"
779-
timeStartCheck=`date +%s`
780-
time root -l -b -q $O2DPG_ROOT/DATA/production/common/readAO2Ds.C > checkAO2D.log
781-
exitcode=$?
782-
timeEndCheck=`date +%s`
783-
timeUsedCheck=$(( $timeUsedCheck+$timeEndCheck-$timeStartCheck ))
784-
if [[ $exitcode -ne 0 ]]; then
785-
echo "exit code from AO2D check is " $exitcode > validation_error.message
786-
echo "exit code from AO2D check is " $exitcode
787-
fi
788-
fi
789-
cd -
790-
ls $AOD_FILE > list_$AOD_DIR.list
791819
echo "$AOD_DIR" >> $JOB_LIST
792820
done
793-
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
794-
# spawning the parallel merging
795-
timeStartMerge=`date +%s`
796-
arr=()
797-
aods=()
798-
mergedok=()
799-
i=0
800-
while IFS= read -r line; do
801-
while [[ $CURRENT_POOL_SIZE -ge $MAX_POOL_SIZE ]]; do
802-
CURRENT_POOL_SIZE=`jobs -r | wc -l`
803-
sleep 1
804-
done
805-
run_AOD_merging $line &
806-
arr[$i]=$!
807-
aods[$i]=$line
808-
i=$((i+1))
821+
# spawning the parallel merging
822+
timeStartMerge=`date +%s`
823+
arr=()
824+
aods=()
825+
mergedok=()
826+
i=0
827+
while IFS= read -r line; do
828+
while [[ $CURRENT_POOL_SIZE -ge $MAX_POOL_SIZE ]]; do
809829
CURRENT_POOL_SIZE=`jobs -r | wc -l`
810-
done < $JOB_LIST
811-
# collecting return codes of the merging processes
812-
for i in "${!arr[@]}"; do
813-
wait ${arr[$i]}
814-
exitcode=$?
815-
if [[ $exitcode -ne 0 ]]; then
816-
echo "Exit code from the process merging DFs inside AO2D for ${aods[$i]} is " $exitcode > validation_error.message
817-
echo "Exit code from the process merging DFs inside AO2D for ${aods[$i]} is " $exitcode
818-
echo "This means that the merging of DFs for ${aods[$i]} FAILED, we make the whole processing FAIL"
819-
exit $exitcode
820-
mergedok[$((10#${aods[$i]}))]=0
821-
else
822-
echo "Merging of DFs inside the AO2D in ${aods[$i]} worked correctly"
823-
mergedok[$((10#${aods[$i]}))]=1
824-
fi
830+
sleep 1
825831
done
826-
timeEndMerge=`date +%s`
827-
timeUsedMerge=$(( $timeUsedMerge+$timeEndMerge-$timeStartMerge ))
828-
# Checking the merged AODs, and replacing the original ones with the merged ones if all is ok
829-
# This loop could be merged with the above, but for now we keep them separate
830-
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
831-
AOD_FILE=`echo $AOD_LIST | cut -d' ' -f$i`
832-
AOD_DIR=`dirname $AOD_FILE | sed -e 's|./||'`
833-
echo "Inspecting $AOD_DIR:"
834-
if [[ ${mergedok[$((10#$AOD_DIR))]} == 0 ]]; then
835-
echo "Merging for $AOD_DIR DID NOT work, we will do nothing for this file - BUT IT SHOULD HAVE NOT HAPPENED, PLEASE CHECK"
836-
exit 8
837-
continue
838-
else
839-
echo "Merging for $AOD_DIR DID work, let's continue"
840-
fi
841-
cd $AOD_DIR
842-
# now checking them
843-
echo "Checking AO2Ds with merged DFs in $AOD_DIR"
844-
timeStartCheckMergedAOD=`date +%s`
845-
time root -l -b -q '$O2DPG_ROOT/DATA/production/common/readAO2Ds.C("AO2D_merged.root")' > checkAO2D_merged.log
846-
exitcode=$?
847-
timeEndCheckMergedAOD=`date +%s`
848-
timeUsedCheckMergedAOD=$(( $timeUsedCheckMergedAOD+$timeEndCheckMergedAOD-$timeStartCheckMergedAOD ))
849-
if [[ $exitcode -ne 0 ]]; then
850-
echo "exit code from AO2D with merged DFs check is " $exitcode > validation_error.message
851-
echo "exit code from AO2D with merged DFs check is " $exitcode
852-
echo "We will keep the AO2Ds with unmerged DFs"
853-
else
854-
echo "All ok, replacing initial AO2D.root file with the one with merged DFs"
855-
mv AO2D_merged.root AO2D.root
856-
fi
857-
cd ..
858-
done
859-
fi
832+
run_AOD_merging $line &
833+
arr[$i]=$!
834+
aods[$i]=$line
835+
i=$((i+1))
836+
CURRENT_POOL_SIZE=`jobs -r | wc -l`
837+
done < $JOB_LIST
838+
# collecting return codes of the merging processes
839+
for i in "${!arr[@]}"; do
840+
wait ${arr[$i]}
841+
exitcode=$?
842+
if [[ $exitcode -ne 0 ]]; then
843+
echo "Exit code from the process check+merging+check_mergedAODs for ${aods[$i]} is " $exitcode > validation_error.message
844+
echo "Exit code from the process check+merging+check_mergedAODs for ${aods[$i]} is " $exitcode
845+
echo "This means that the process check+merging+check_mergedAODs for ${aods[$i]} FAILED, we make the whole processing FAIL"
846+
exit $exitcode
847+
mergedok[$((10#${aods[$i]}))]=0
848+
else
849+
echo "Merging of DFs inside the AO2D in ${aods[$i]} worked correctly"
850+
mergedok[$((10#${aods[$i]}))]=1
851+
fi
852+
done
853+
timeEndMerge=`date +%s`
854+
timeUsedMerge=$(( $timeUsedMerge+$timeEndMerge-$timeStartMerge ))
855+
echo "--> Total Time spent in checking and merging AODs = $timeUsedMerge s"
856+
860857
# running analysis QC if requested
861858
if [[ $ALIEN_JDL_RUNANALYSISQC == 1 ]]; then
862859
for (( i = 1; i <=$AOD_LIST_COUNT; i++)); do
@@ -887,26 +884,18 @@ if [[ $ALIEN_JDL_AODOFF != 1 ]]; then
887884
fi
888885
cd ..
889886
done
890-
else
891-
echo "Analysis QC will not be run, ALIEN_JDL_RUNANALYSISQC = $ALIEN_JDL_RUNANALYSISQC"
892-
fi
893-
echo "Time spent in checking initial AODs = $timeUsedCheck s"
894-
if [[ -z $ALIEN_JDL_DONOTMERGEAODS ]] || [[ $ALIEN_JDL_DONOTMERGEAODS == 0 ]]; then
895-
echo "Time spent in merging AODs = $timeUsedMerge s"
896-
echo "Time spent in checking final AODs = $timeUsedCheckMergedAOD s"
897-
fi
898-
if [[ $ALIEN_JDL_RUNANALYSISQC == 1 ]]; then
899887
echo "Time spent in AnalysisQC = $timeUsedAnalysisQC s"
900888
else
889+
echo "Analysis QC will not be run, ALIEN_JDL_RUNANALYSISQC = $ALIEN_JDL_RUNANALYSISQC"
901890
echo "No timing reported for Analysis QC, since it was not run"
902891
fi
903892
fi
904893

905894

906895
timeEndFullProcessing=`date +%s`
907-
timeUsedFullProcessing=$(( $timeEndFullProcessing+$timeStartFullProcessing ))
896+
timeUsedFullProcessing=$(( $timeEndFullProcessing-$timeStartFullProcessing ))
908897

909-
echo "Time used for processing = $timeUsedFullProcessing s"
898+
echo "Total time used for processing = $timeUsedFullProcessing s"
910899

911900
if [[ $ALIEN_JDL_QCOFF != 1 ]]; then
912901
# copying the QC json file here

0 commit comments

Comments
 (0)