going-doer · aTnT · May 21, 2025 · May 21, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1,36 @@
+# Paper2Code .env file example
+# Copy this file to .env and modify as needed
+
+# IMPORTANT: When using LiteLLM, ensure you have both litellm AND any provider-specific
+# dependencies installed. For AWS Bedrock, this includes boto3.
+
+# Uncomment ONE of the following provider configurations:
+
+# 1. AWS Bedrock Configuration
+AWS_REGION=us-west-2
+BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+DISABLE_PROMPT_CACHING=0  # Set to 1 to disable caching
+AWS_SHARED_CREDENTIALS_FILE=~/.aws/credentials
+AWS_CONFIG_FILE=~/.aws/config
+# NOTE: For AWS Bedrock, you must:
+# 1. Have boto3 installed (pip install boto3)
+# 2. Have valid AWS credentials configured
+# 3. Have appropriate permissions to use the specified model
+
+# 2. OpenAI
+# OPENAI_API_KEY=your_api_key_here
+# OPENAI_MODEL=o3-mini  # Default if not specified
+
+# 3. Direct Anthropic API (not Bedrock)
+# ANTHROPIC_API_KEY=your_api_key_here
+# ANTHROPIC_MODEL=claude-3-sonnet-20240229  # Default if not specified
+
+
+# Paper-specific settings (these should be set via command line args or script)
+# PAPER_NAME=Transformer
+# PDF_PATH=./examples/Transformer.pdf
+# PDF_JSON_PATH=./examples/Transformer.json
+# PDF_JSON_CLEANED_PATH=./examples/Transformer_cleaned.json
+# PDF_LATEX_PATH=./examples/Transformer_cleaned.tex
+# OUTPUT_DIR=./outputs/Transformer
+# OUTPUT_REPO_DIR=./outputs/Transformer_repo
diff --git a/README.md b/README.md
@@ -45,6 +45,55 @@ cd scripts
 bash run_llm.sh
 ```
 
+### Using Other LLM Providers with LiteLLM
+- PaperCoder now supports any LLM provider available through [LiteLLM](https://github.com/BerriAI/litellm)
+- Configure your model settings in a `.env` file in the project root directory (see `.env.example`)
+- Supports standard LiteLLM provider syntax including:
+  - AWS Bedrock (`bedrock/model-name`) - requires boto3
+  - OpenAI (`openai/model-name`) - uses o3-mini by default
+  - Anthropic (`anthropic/model-name`) - direct API access
+
+#### LiteLLM Provider Configurations
+Choose ONE of the following provider configurations in your .env file:
+
+##### 1. AWS Bedrock
+```
+AWS_REGION=<your-region>
+BEDROCK_MODEL=<model-name>  # e.g., anthropic.claude-3-sonnet-20240229-v1:0
+DISABLE_PROMPT_CACHING=0
+AWS_SHARED_CREDENTIALS_FILE=~/.aws/credentials
+AWS_CONFIG_FILE=~/.aws/config
+```
+
+##### 2. OpenAI
+```
+OPENAI_API_KEY=<your-openai-api-key>
+OPENAI_MODEL=o3-mini  # Default if not specified
+```
+
+##### 3. Anthropic Direct API
+```
+ANTHROPIC_API_KEY=<your-anthropic-api-key>
+ANTHROPIC_MODEL=claude-3-sonnet-20240229  # Default if not specified
+```
+
+```bash
+# Install LiteLLM
+pip install litellm
+
+# For provider-specific dependencies:
+# - AWS Bedrock requires boto3
+pip install boto3
+
+# Copy and modify the example .env file
+cp .env.example .env
+# Edit the .env file with your provider configuration
+
+# Run the scripts - they will use LiteLLM if configured or fall back to vLLM
+cd scripts
+bash run_llm.sh
+```
+
 ### Output Folder Structure (Only Important Files)
 ```bash
 outputs
@@ -65,11 +114,14 @@ outputs
   - For OpenAI API: `openai`
   - For open-source models: `vllm`
       - If you encounter any issues installing vLLM, please refer to the [official vLLM repository](https://github.com/vllm-project/vllm).
+  - For other LLM providers (like AWS Bedrock): `litellm`
+      - Check the [LiteLLM documentation](https://github.com/BerriAI/litellm) for supported models and configurations.
 
 
 ```bash
 pip install openai 
-pip install vllm 
+pip install vllm
+pip install litellm
 ```
 
 - Or, if you prefer, you can install all dependencies using `pip`:
@@ -132,8 +184,9 @@ bash run_latex.sh
 ```
 
 
-#### Using Open Source Models with vLLM
-- The default model is `deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct`.
+#### Using Open Source Models with vLLM or LiteLLM
+- The default model is `deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct` (using vLLM)
+- For LiteLLM integration, create a `.env` file in the project root with your provider configuration
 
 ```bash
 # Using the PDF-based JSON format of the paper
@@ -147,6 +200,23 @@ cd scripts
 bash run_latex_llm.sh
 ```
 
+```bash
+# Example .env configuration (AWS Bedrock with Claude)
+AWS_REGION=eu-north-1
+BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+DISABLE_PROMPT_CACHING=0
+AWS_SHARED_CREDENTIALS_FILE=~/.aws/credentials
+AWS_CONFIG_FILE=~/.aws/config
+
+# Or for OpenAI
+# OPENAI_API_KEY=your-api-key
+# OPENAI_MODEL=o3-mini
+
+# Or for Anthropic Direct API
+# ANTHROPIC_API_KEY=your-api-key
+# ANTHROPIC_MODEL=claude-3-sonnet-20240229
+```
+
 ---
 
 ## 📦 Paper2Code Benchmark Datasets

diff --git a/codes/1_planning_llm.py b/codes/1_planning_llm.py
@@ -3,8 +3,11 @@
 import os
 import sys
 from utils import print_response
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
+from load_dotenv import load_env_config
+from litellm_utils import get_llm_client, run_inference
+
+# Load environment variables
+load_env_config()
 
 parser = argparse.ArgumentParser()
 
@@ -221,37 +224,17 @@
     }]
 
 
-model_name = args.model_name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-
-if "Qwen" in model_name:
-    llm = LLM(model=model_name, 
-            tensor_parallel_size=tp_size, 
-            max_model_len=max_model_len,
-            gpu_memory_utilization=0.95,
-            trust_remote_code=True, enforce_eager=True, 
-            rope_scaling={"factor": 4.0, "original_max_position_embeddings": 32768, "type": "yarn"})
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=131072)
-
-elif "deepseek" in model_name:
-    llm = LLM(model=model_name, 
-              tensor_parallel_size=tp_size, 
-              max_model_len=max_model_len,
-              gpu_memory_utilization=0.95,
-              trust_remote_code=True, enforce_eager=True)
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=128000, stop_token_ids=[tokenizer.eos_token_id])
-
+# Initialize LLM client (either LiteLLM from environment or vLLM from args)
+client, tokenizer, is_litellm, sampling_params = get_llm_client(
+    model_name=model_name, 
+    tp_size=tp_size, 
+    max_model_len=max_model_len,
+    temperature=temperature
+)
 
 def run_llm(msg):
-    # vllm
-    prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in [msg]]
-
-    outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
-
-    completion = [output.outputs[0].text for output in outputs]
-
-    return completion[0] 
+    """Run inference using either LiteLLM or vLLM based on configuration"""
+    return run_inference(client, tokenizer, is_litellm, sampling_params, msg) 
 
 responses = []
 trajectories = []

diff --git a/codes/2_analyzing_llm.py b/codes/2_analyzing_llm.py
@@ -4,11 +4,14 @@
 from utils import extract_planning, content_to_json, print_response
 import copy
 import sys
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
+from load_dotenv import load_env_config
+from litellm_utils import get_llm_client, run_inference
 
 import argparse
 
+# Load environment variables
+load_env_config()
+
 parser = argparse.ArgumentParser()
 
 parser.add_argument('--paper_name',type=str)
@@ -146,36 +149,17 @@ def get_write_msg(todo_file_name, todo_file_desc):
 
 
 
-model_name = args.model_name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-
-if "Qwen" in model_name:
-    llm = LLM(model=model_name, 
-            tensor_parallel_size=tp_size, 
-            max_model_len=max_model_len,
-            gpu_memory_utilization=0.95,
-            trust_remote_code=True, enforce_eager=True, 
-            rope_scaling={"factor": 4.0, "original_max_position_embeddings": 32768, "type": "yarn"})
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=131072)
-
-elif "deepseek" in model_name:
-    llm = LLM(model=model_name, 
-              tensor_parallel_size=tp_size, 
-              max_model_len=max_model_len,
-              gpu_memory_utilization=0.95,
-              trust_remote_code=True, enforce_eager=True)
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=128000, stop_token_ids=[tokenizer.eos_token_id])
+# Initialize LLM client (either LiteLLM from environment or vLLM from args)
+client, tokenizer, is_litellm, sampling_params = get_llm_client(
+    model_name=model_name, 
+    tp_size=tp_size, 
+    max_model_len=max_model_len,
+    temperature=temperature
+)
 
 def run_llm(msg):
-    # vllm
-    prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in [msg]]
-
-    outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
-
-    completion = [output.outputs[0].text for output in outputs]
-
-    return completion[0]
+    """Run inference using either LiteLLM or vLLM based on configuration"""
+    return run_inference(client, tokenizer, is_litellm, sampling_params, msg)
 
 artifact_output_dir=f'{output_dir}/analyzing_artifacts'
 os.makedirs(artifact_output_dir, exist_ok=True)

diff --git a/codes/3_coding_llm.py b/codes/3_coding_llm.py
@@ -4,11 +4,14 @@
 import sys
 import copy
 from utils import extract_planning, content_to_json, extract_code_from_content,extract_code_from_content2, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
+from load_dotenv import load_env_config
+from litellm_utils import get_llm_client, run_inference
 
 import argparse
 
+# Load environment variables
+load_env_config()
+
 parser = argparse.ArgumentParser()
 
 parser.add_argument('--paper_name',type=str)
@@ -154,37 +157,17 @@ def get_write_msg(todo_file_name, detailed_logic_analysis, done_file_lst):
 ## Code: {todo_file_name}"""}]
     return write_msg
 
-model_name = args.model_name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-
-if "Qwen" in model_name:
-    llm = LLM(model=model_name, 
-            tensor_parallel_size=tp_size, 
-            max_model_len=max_model_len,
-            gpu_memory_utilization=0.95,
-            trust_remote_code=True, enforce_eager=True, 
-            rope_scaling={"factor": 4.0, "original_max_position_embeddings": 32768, "type": "yarn"})
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=131072)
-
-elif "deepseek" in model_name:
-    llm = LLM(model=model_name, 
-              tensor_parallel_size=tp_size, 
-              max_model_len=max_model_len,
-              gpu_memory_utilization=0.95,
-              trust_remote_code=True, enforce_eager=True)
-    sampling_params = SamplingParams(temperature=temperature, max_tokens=128000, stop_token_ids=[tokenizer.eos_token_id])
-
+# Initialize LLM client (either LiteLLM from environment or vLLM from args)
+client, tokenizer, is_litellm, sampling_params = get_llm_client(
+    model_name=model_name, 
+    tp_size=tp_size, 
+    max_model_len=max_model_len,
+    temperature=temperature
+)
 
 def run_llm(msg):
-    # vllm
-    prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in [msg]]
-
-    outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
-
-    completion = [output.outputs[0].text for output in outputs]
-
-    return completion[0] 
+    """Run inference using either LiteLLM or vLLM based on configuration"""
+    return run_inference(client, tokenizer, is_litellm, sampling_params, msg) 
 
 
 # testing for checking