Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions configs/vision_prune/mobilenetv2_cifar10_unified.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ pruning:
- "random" # Random baseline
- "magnitude" # Standard magnitude pruning (prune low)
- "taylor" # Gradient-based importance
- "network_slimming" # Network Slimming (BN gamma) baseline
- "geometric_median" # FPGM-style geometric median baseline
- "hrank" # HRank feature-rank baseline

# =========================================================================
# SINGLE METRICS - Prune LOW (assumes low = unimportant)
Expand Down Expand Up @@ -181,6 +184,9 @@ pruning:
scoring_methods:
- "random"
- "magnitude"
- "network_slimming"
- "geometric_median"
- "hrank"
- "rq_low"
- "rq_high"
- "redundancy_low"
Expand Down
6 changes: 6 additions & 0 deletions configs/vision_prune/resnet18_cifar10_unified.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ pruning:
- "random" # Random baseline
- "magnitude" # Standard magnitude pruning (prune low)
- "taylor" # Gradient-based importance
- "network_slimming" # Network Slimming (BN gamma) baseline
- "geometric_median" # FPGM-style geometric median baseline
- "hrank" # HRank feature-rank baseline

# =========================================================================
# SINGLE METRICS - Prune LOW (assumes low = unimportant)
Expand Down Expand Up @@ -190,6 +193,9 @@ pruning:
scoring_methods:
- "random"
- "magnitude"
- "network_slimming"
- "geometric_median"
- "hrank"
- "rq_low"
- "rq_high"
- "redundancy_low"
Expand Down
6 changes: 6 additions & 0 deletions configs/vision_prune/resnet50_imagenet100_unified.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ pruning:
- "random" # Random baseline
- "magnitude" # Standard magnitude pruning (prune low)
- "taylor" # Gradient-based importance
- "network_slimming" # Network Slimming (BN gamma) baseline
- "geometric_median" # FPGM-style geometric median baseline
- "hrank" # HRank feature-rank baseline

# =========================================================================
# SINGLE METRICS - Prune LOW (assumes low = unimportant)
Expand Down Expand Up @@ -182,6 +185,9 @@ pruning:
scoring_methods:
- "random"
- "magnitude"
- "network_slimming"
- "geometric_median"
- "hrank"
- "rq_low"
- "rq_high"
- "redundancy_low"
Expand Down
6 changes: 6 additions & 0 deletions configs/vision_prune/vgg16_cifar10_unified.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ pruning:
- "random" # Random baseline
- "magnitude" # Standard magnitude pruning (prune low)
- "taylor" # Gradient-based importance
- "network_slimming" # Network Slimming (BN gamma) baseline
- "geometric_median" # FPGM-style geometric median baseline
- "hrank" # HRank feature-rank baseline

# =========================================================================
# SINGLE METRICS - Prune LOW (assumes low = unimportant)
Expand Down Expand Up @@ -178,6 +181,9 @@ pruning:
scoring_methods:
- "random"
- "magnitude"
- "network_slimming"
- "geometric_median"
- "hrank"
- "rq_low"
- "rq_high"
- "redundancy_low"
Expand Down
42 changes: 26 additions & 16 deletions slurm_jobs/run_baseline_test.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash
#SBATCH --job-name=baseline_test
#SBATCH --partition=kempner_h100
#SBATCH --account=kempner_dev
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=16
Expand All @@ -14,31 +12,43 @@
# Quick test for Wanda/SparseGPT integration
# Expected runtime: ~30-60 minutes

set -e
set -euo pipefail

# NOTE: Cluster-specific SBATCH settings like --partition/--account are intentionally omitted.
# Submit with your local settings, e.g.:
# sbatch --partition=<PARTITION> --account=<ACCOUNT> slurm_jobs/run_baseline_test.sh

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

echo "=========================================="
echo "Baseline Pruning Test (Wanda + SparseGPT)"
echo "=========================================="
echo "Job ID: $SLURM_JOB_ID"
echo "Job ID: ${SLURM_JOB_ID:-N/A}"
echo "Node: $(hostname)"
echo "Start time: $(date)"
echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)"
echo ""

# Environment setup
module purge
module load cuda/12.2.0-fasrc01
eval "$(conda shell.bash hook)"
conda activate networkAlignmentAnalysis

# Set up paths
cd /n/holylabs/kempner_dev/Users/hsafaai/Code/alignment

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export TOKENIZERS_PARALLELISM=false
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export HF_HOME=/n/home13/hsafaai/.cache/huggingface
export HF_TOKEN=$(cat /n/home13/hsafaai/.cache/huggingface/token)
export OMP_NUM_THREADS="${SLURM_CPUS_PER_TASK:-1}"

if command -v conda >/dev/null 2>&1; then
eval "$(conda shell.bash hook)"
conda activate "${CONDA_ENV:-networkAlignmentAnalysis}"
else
echo "WARN: conda not found; assuming environment already activated." >&2
fi

export HF_HOME="${HF_HOME:-${HOME}/.cache/huggingface}"
HF_TOKEN_FILE="${HF_HOME}/token"
if [[ -f "$HF_TOKEN_FILE" ]]; then
export HF_TOKEN="$(cat "$HF_TOKEN_FILE")"
export HUGGINGFACE_HUB_TOKEN="${HF_TOKEN}"
else
echo "WARN: HF token file not found at $HF_TOKEN_FILE (set HF_TOKEN env var if needed)" >&2
fi

# Create logs directory
mkdir -p logs
Expand Down
40 changes: 26 additions & 14 deletions slurm_jobs/run_fast_pruning.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
#SBATCH --cpus-per-task=8
#SBATCH --time=02:00:00
#SBATCH --mem=80GB
#SBATCH --partition=kempner_eng
#SBATCH --account=kempner_dev

# ============================================================================
# FAST LLM PRUNING COMPARISON
# ============================================================================
# NOTE: Cluster-specific SBATCH settings like --partition/--account are intentionally omitted.
# Submit with your local settings, e.g.:
# sbatch --partition=<PARTITION> --account=<ACCOUNT> slurm_jobs/run_fast_pruning.sh
#
# Quick iteration version for development and testing
# Expected runtime: ~30-60 minutes on H100
#
Expand All @@ -25,30 +27,40 @@
# - 50 eval samples instead of 100
# ============================================================================

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

echo "============================================================================"
echo "FAST LLM PRUNING COMPARISON"
echo "============================================================================"
echo "Job ID: $SLURM_JOB_ID"
echo "Job ID: ${SLURM_JOB_ID:-N/A}"
echo "Node: $(hostname)"
echo "Start time: $(date)"
echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)"
echo ""

# Environment setup
module purge
module load cuda/12.2.0-fasrc01
eval "$(conda shell.bash hook)"
conda activate networkAlignmentAnalysis

cd /n/holylabs/kempner_dev/Users/hsafaai/Code/alignment

mkdir -p logs

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
if command -v conda >/dev/null 2>&1; then
eval "$(conda shell.bash hook)"
conda activate "${CONDA_ENV:-networkAlignmentAnalysis}"
else
echo "WARN: conda not found; assuming environment already activated." >&2
fi

export OMP_NUM_THREADS="${SLURM_CPUS_PER_TASK:-1}"
export TOKENIZERS_PARALLELISM=false
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export HF_HOME=/n/home13/hsafaai/.cache/huggingface
export HF_TOKEN=$(cat /n/home13/hsafaai/.cache/huggingface/token)
export HF_HOME="${HF_HOME:-${HOME}/.cache/huggingface}"
HF_TOKEN_FILE="${HF_HOME}/token"
if [[ -f "$HF_TOKEN_FILE" ]]; then
export HF_TOKEN="$(cat "$HF_TOKEN_FILE")"
export HUGGINGFACE_HUB_TOKEN="${HF_TOKEN}"
else
echo "WARN: HF token file not found at $HF_TOKEN_FILE (set HF_TOKEN env var if needed)" >&2
fi

echo "============================================================================"
echo "FAST MODE CONFIGURATION:"
Expand Down
29 changes: 18 additions & 11 deletions slurm_jobs/run_mnist_basic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,29 @@
#SBATCH --cpus-per-task=8
#SBATCH --time=0:30:00
#SBATCH --mem=32GB
#SBATCH --partition=kempner_eng
#SBATCH --account=kempner_dev

set -euo pipefail

# NOTE: Cluster-specific SBATCH settings like --partition/--account are intentionally omitted.
# Submit with your local settings, e.g.:
# sbatch --partition=<PARTITION> --account=<ACCOUNT> slurm_jobs/run_mnist_basic.sh

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

echo "Starting MNIST basic alignment experiment at $(date)"
echo "Job ID: $SLURM_JOB_ID"
echo "Job ID: ${SLURM_JOB_ID:-N/A}"
echo "Running on: $(hostname)"

# Environment setup (conda env: networkAlignmentAnalysis)
module purge
module load cuda/12.2.0-fasrc01
eval "$(conda shell.bash hook)"
conda activate networkAlignmentAnalysis

cd /n/holylabs/kempner_dev/Users/hsafaai/Code/alignment
if command -v conda >/dev/null 2>&1; then
eval "$(conda shell.bash hook)"
conda activate "${CONDA_ENV:-networkAlignmentAnalysis}"
else
echo "WARN: conda not found; assuming environment already activated." >&2
fi

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
mkdir -p logs
export OMP_NUM_THREADS="${SLURM_CPUS_PER_TASK:-1}"

python scripts/run_experiment.py \
--config configs/examples/mnist_basic.yaml \
Expand Down
54 changes: 33 additions & 21 deletions slurm_jobs/run_single_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,23 @@
#SBATCH --cpus-per-task=16
#SBATCH --time=24:00:00
#SBATCH --mem=320GB
#SBATCH --partition=kempner_eng
#SBATCH --account=kempner_dev

# ============================================================================
# SINGLE MODEL PRUNING (Specify config via argument)
# ============================================================================
# Usage: sbatch run_single_model.sh <config_name>
# NOTE: Cluster-specific SBATCH settings like --partition/--account are intentionally omitted.
# Submit with your local settings, e.g.:
# sbatch --partition=<PARTITION> --account=<ACCOUNT> slurm_jobs/run_single_model.sh <config_name>
#
# Usage: sbatch slurm_jobs/run_single_model.sh <config_name>
#
# Examples:
# sbatch run_single_model.sh mistral7b_pruning
# sbatch run_single_model.sh llama2_7b_pruning
# sbatch run_single_model.sh gemma2b_pruning
# sbatch run_single_model.sh phi3_mini_pruning
# sbatch run_single_model.sh qwen2_7b_pruning
# sbatch run_single_model.sh gpt2_fast_test
# sbatch slurm_jobs/run_single_model.sh mistral7b_pruning
# sbatch slurm_jobs/run_single_model.sh llama2_7b_pruning
# sbatch slurm_jobs/run_single_model.sh gemma2b_pruning
# sbatch slurm_jobs/run_single_model.sh phi3_mini_pruning
# sbatch slurm_jobs/run_single_model.sh qwen2_7b_pruning
# sbatch slurm_jobs/run_single_model.sh gpt2_fast_test
#
# Available configs:
# - mistral7b_pruning (Mistral-7B)
Expand All @@ -38,10 +40,15 @@
CONFIG_NAME=${1:-"llama3_minitron_comparison"}
CONFIG="configs/examples/${CONFIG_NAME}.yaml"

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$REPO_ROOT"

echo "============================================================================"
echo "SINGLE MODEL PRUNING: ${CONFIG_NAME}"
echo "============================================================================"
echo "Job ID: $SLURM_JOB_ID"
echo "Job ID: ${SLURM_JOB_ID:-N/A}"
echo "Node: $(hostname)"
echo "Config: $CONFIG"
echo "Start time: $(date)"
Expand All @@ -57,21 +64,26 @@ if [ ! -f "$CONFIG" ]; then
exit 1
fi

# Environment setup
module purge
module load cuda/12.2.0-fasrc01
eval "$(conda shell.bash hook)"
conda activate networkAlignmentAnalysis

cd /n/holylabs/kempner_dev/Users/hsafaai/Code/alignment

mkdir -p logs

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
if command -v conda >/dev/null 2>&1; then
eval "$(conda shell.bash hook)"
conda activate "${CONDA_ENV:-networkAlignmentAnalysis}"
else
echo "WARN: conda not found; assuming environment already activated." >&2
fi

export OMP_NUM_THREADS="${SLURM_CPUS_PER_TASK:-1}"
export TOKENIZERS_PARALLELISM=false
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export HF_HOME=/n/home13/hsafaai/.cache/huggingface
export HF_TOKEN=$(cat /n/home13/hsafaai/.cache/huggingface/token)
export HF_HOME="${HF_HOME:-${HOME}/.cache/huggingface}"
HF_TOKEN_FILE="${HF_HOME}/token"
if [[ -f "$HF_TOKEN_FILE" ]]; then
export HF_TOKEN="$(cat "$HF_TOKEN_FILE")"
export HUGGINGFACE_HUB_TOKEN="${HF_TOKEN}"
else
echo "WARN: HF token file not found at $HF_TOKEN_FILE (set HF_TOKEN env var if needed)" >&2
fi

echo "============================================================================"
echo "Running experiment..."
Expand Down
Loading
Loading