Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt requirements-server.txt ./
RUN pip install --no-cache-dir -r requirements.txt -r requirements-server.txt

COPY . .

ENV WORKSPACE_DIR=/data/workspace
RUN mkdir -p /data/workspace

EXPOSE 8000

CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
16 changes: 16 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
services:
pageindex:
build: .
restart: unless-stopped
ports:
- "8000:8000"
environment:
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- WORKSPACE_DIR=/data/workspace
- ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-*}
- API_SECRET=${API_SECRET:-}
volumes:
- pageindex_data:/data/workspace

volumes:
pageindex_data:
4 changes: 4 additions & 0 deletions requirements-server.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
python-multipart==0.0.9
anthropic>=0.40.0
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ litellm==1.83.7
# openai-agents # optional: required for examples/agentic_vectorless_rag_demo.py
pymupdf==1.26.4
PyPDF2==3.0.1
python-dotenv==1.2.2
python-dotenv==1.0.1
pyyaml==6.0.2

128 changes: 128 additions & 0 deletions server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import os
import tempfile
import anthropic
from fastapi import FastAPI, File, HTTPException, Header, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from pageindex.client import PageIndexClient

app = FastAPI(title="PageIndex API")

app.add_middleware(
CORSMiddleware,
allow_origins=os.environ.get("ALLOWED_ORIGINS", "*").split(","),
allow_methods=["POST", "GET"],
allow_headers=["*"],
)

WORKSPACE_DIR = os.environ.get("WORKSPACE_DIR", "./workspace")
API_SECRET = os.environ.get("API_SECRET", "")
os.makedirs(WORKSPACE_DIR, exist_ok=True)

pi_client = PageIndexClient(
model=os.environ.get("PAGEINDEX_MODEL", "claude-sonnet-4-6"),
workspace=WORKSPACE_DIR,
)

anthropic_client = anthropic.Anthropic()


def verify_secret(x_api_secret: str = Header(default="")):
if API_SECRET and x_api_secret != API_SECRET:
raise HTTPException(status_code=401, detail="Invalid API secret.")


@app.get("/health")
def health():
return {"status": "ok"}


@app.post("/index")
async def index_document(
file: UploadFile = File(...),
x_api_secret: str = Header(default=""),
):
verify_secret(x_api_secret)
if not file.filename.lower().endswith(".pdf"):
raise HTTPException(status_code=400, detail="Only PDF files are supported")

with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
tmp.write(await file.read())
tmp_path = tmp.name

try:
doc_id = pi_client.index(tmp_path)
return {"doc_id": doc_id, "filename": file.filename}
finally:
os.unlink(tmp_path)


class QueryRequest(BaseModel):
doc_id: str
question: str


@app.post("/query")
def query_document(
req: QueryRequest,
x_api_secret: str = Header(default=""),
):
verify_secret(x_api_secret)
structure = pi_client.get_document_structure(req.doc_id)

tools = [
{
"name": "get_page_content",
"description": "Retrieve the text of specific pages from the document.",
"input_schema": {
"type": "object",
"properties": {
"pages": {
"type": "string",
"description": "Pages to fetch. Examples: '5-7', '3,8', '12'.",
}
},
"required": ["pages"],
},
}
]

messages = [
{
"role": "user",
"content": f"Document structure:\n{structure}\n\nQuestion: {req.question}",
}
]

for _ in range(10):
response = anthropic_client.messages.create(
model="claude-sonnet-4-6",
max_tokens=2048,
system=(
"You are a document analysis assistant. "
"Use the document structure to navigate the document and answer the question. "
"Always cite the page numbers where you found the information."
),
tools=tools,
messages=messages,
)

if response.stop_reason == "end_turn":
answer = next(
(b.text for b in response.content if hasattr(b, "text")), ""
)
return {"answer": answer}

if response.stop_reason == "tool_use":
tool_results = []
for block in response.content:
if block.type == "tool_use" and block.name == "get_page_content":
content = pi_client.get_page_content(req.doc_id, block.input["pages"])
tool_results.append(
{"type": "tool_result", "tool_use_id": block.id, "content": content}
)
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": tool_results})

raise HTTPException(status_code=500, detail="Could not answer the question.")
37 changes: 37 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
# PageIndex Azure VM setup script
# Run as root on a fresh Ubuntu 22.04 VM
set -e

echo "=== Installing Docker ==="
apt-get update -qq
apt-get install -y docker.io docker-compose-plugin git curl

systemctl enable docker
systemctl start docker

echo "=== Cloning PageIndex ==="
git clone https://github.com/clapointe-carbonleo/PageIndex.git /opt/pageindex
cd /opt/pageindex
git checkout feat/fastapi-server

echo "=== Creating .env file ==="
cat > /opt/pageindex/.env << 'ENVEOF'
# Required — Anthropic API key for Claude
ANTHROPIC_API_KEY=sk-ant-REPLACE_ME

# Allowed origins (comma-separated) — set to your mike-legal Vercel URL
ALLOWED_ORIGINS=https://mike-legal-three.vercel.app

# Optional — secret token to protect the API (add to PAGEINDEX_SECRET in Vercel)
API_SECRET=REPLACE_WITH_RANDOM_SECRET
ENVEOF

echo "=== Building and starting service ==="
cd /opt/pageindex
docker compose up -d --build

echo ""
echo "=== Done! ==="
echo "PageIndex service is running on port 8000."
echo "Edit /opt/pageindex/.env to set your real API keys, then run: docker compose restart"