-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_format.sh
More file actions
29 lines (25 loc) · 1.95 KB
/
data_format.sh
File metadata and controls
29 lines (25 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
## prepare for statistics
# python data_format_utils.py collect \
# --data_dir "/root/autodl-tmp/projects/_codeprm/process_data_annotation/outputs" \
# --output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json"
# python data_format_utils.py analyze_steps_info \
# --data_path "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json" \
# --output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json"
## do analyze
# python data_format_utils.py analyze_value_distribution \
# --data_path "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json" \
# --output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/value_distribution_view.png"
# python data_format_utils.py analyze_steps_info \
# --data_path "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json" \
# --preprocess False \
# --output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/steps_info_view.png"
# align the data format for prm training
python data_format_utils.py to_prm_train_format \
<<<<<<< HEAD
--data_path "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data2/collected_process_annotation_steps_data.json" \
--output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data2/prm_train_raw.json"
=======
--data_path "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/collected_process_annotation_steps_data.json" \
--output_file "/root/autodl-tmp/projects/_codeprm/process_data_annotation/data/raw_data/prm_train_raw_soft.json" \
--use_hard_label "False"
>>>>>>> 102cd4b85d91055ab4cd79da1744a87e5c3c6602