Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions results/260318-gemma-eval/*eval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
Comment thread
Ki-Seki marked this conversation as resolved.

Comment thread
Ki-Seki marked this conversation as resolved.
set -x
Comment thread
Ki-Seki marked this conversation as resolved.

export API_KEY=xxx
export API_BASE=xxx
export MODEL=Sculpt-AI/2603171-gemma

hf download $MODEL

python -m gimbench.ppl.gim_sft --ref_model_device cpu --golden_truth_only --first_n 100
python -m gimbench.ppl.gim_sft --model_type vllm-offline --model_name $MODEL --output_type cfg --ref_model_device cpu --first_n 100
python -m gimbench.match.gim_regex --model_type vllm-offline --model_name $MODEL --output_type cfg --ref_model_device cpu --first_n 100

python -m "gimbench.mcqa.gpqa_diamond" --model_type vllm-offline --model_name "$MODEL" --auto_budget --num_proc 40 --first_n 198
python -m "gimbench.mcqa.medmcqa" --model_type vllm-offline --model_name "$MODEL" --auto_budget --num_proc 40 --first_n 1000
python -m "gimbench.mcqa.mmlu_pro" --model_type vllm-offline --model_name "$MODEL" --auto_budget --num_proc 40 --first_n 1000
python -m "gimbench.mcqa.qasc" --model_type vllm-offline --model_name "$MODEL" --auto_budget --num_proc 40 --first_n 920

python -m "gimbench.mcqa.gpqa_diamond" --model_type vllm-offline --model_name "google/gemma-3-270m-it" --no_gimkit --num_proc 40 --first_n 198
python -m "gimbench.mcqa.medmcqa" --model_type vllm-offline --model_name "google/gemma-3-270m-it" --no_gimkit --num_proc 40 --first_n 1000
python -m "gimbench.mcqa.mmlu_pro" --model_type vllm-offline --model_name "google/gemma-3-270m-it" --no_gimkit --num_proc 40 --first_n 1000
python -m "gimbench.mcqa.qasc" --model_type vllm-offline --model_name "google/gemma-3-270m-it" --no_gimkit --num_proc 40 --first_n 920

python -m gimbench.cv.cv_parse --model_type vllm-offline --model_name $MODEL --output_type cfg --api_key $API_KEY --base_url $API_BASE

shutdown -h +3
Comment thread
Ki-Seki marked this conversation as resolved.
Loading
Loading