-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm.py
More file actions
33 lines (27 loc) · 1.24 KB
/
llm.py
File metadata and controls
33 lines (27 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
import time
import streamlit as st
from groq import Groq
SYSTEM_PROMPT = """You are a helpful assistant. Answer the user's question using ONLY the provided context chunks. Be direct and concise. Do not mention chunk numbers, chunk labels, or say things like 'according to Chunk 1'. Just answer naturally using the information. If the context does not contain enough information, say so clearly. Do not hallucinate or use outside knowledge."""
def get_client(api_key: str) -> Groq:
return Groq(api_key=api_key)
def stream_answer(query: str, context: str, api_key: str, model: str, temperature: float):
client = get_client(api_key)
user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
start = time.time()
stream = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
stream=True,
temperature=temperature,
max_tokens=1024,
)
with st.chat_message("assistant"):
response = st.write_stream(
chunk.choices[0].delta.content or "" for chunk in stream
)
latency = round(time.time() - start, 2)
return response, latency, model