Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

eval "$(devenv direnvrc)"

# You can pass flags to the devenv command
# For example: use devenv --impure --option services.postgres.enable:bool true
use devenv
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,19 @@ charts/*/charts
**/flagged/
web-apps/**/overrides.yml
**/.env

# devenv
.devenv
.devenv.flake.nix
devenv.lock

# Devenv
.devenv*
devenv.local.nix
devenv.local.yaml

# direnv
.direnv

# pre-commit
.pre-commit-config.yaml
133 changes: 133 additions & 0 deletions devenv.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{ pkgs, ... }:
let
allComponents = "chat image-analysis flux-image-gen";
imagePrefix = "ghcr.io/stackhpc/azimuth-llm";
webAppsDir = "./web-apps";

# Resolves "all" or empty arg to the full list, validates otherwise.
resolveComponents = ''
ALL_COMPONENTS="${allComponents}"

resolve_components() {
local input="$1"
if [ -z "$input" ] || [ "$input" = "all" ]; then
echo "$ALL_COMPONENTS"
else
for c in $input; do
if ! echo " $ALL_COMPONENTS " | grep -q " $c "; then
echo "Unknown component: $c" >&2
echo "Available: $ALL_COMPONENTS" >&2
return 1
fi
done
echo "$input"
fi
}

image_name() {
echo "${imagePrefix}-''${1}-ui"
}
'';
in
{
env.GREET = "azimuth-llm dev environment";

packages = with pkgs; [
git
# Container
grype
# Helm / K8s
kubernetes-helm
chart-testing
kind
kubectl
# CI tooling
jq
yq-go
# Python
python311
ruff
black
];

treefmt = {
enable = true;
config.programs = {
nixfmt.enable = true;
black.enable = true;
};
};

git-hooks.hooks = {
treefmt = {
enable = true;
settings.fail-on-change = false;
};
};

difftastic.enable = true;

scripts = {
build.exec = ''
${resolveComponents}
TAG="latest"
COMPONENT=""
while [ $# -gt 0 ]; do
case "$1" in
--tag) TAG="$2"; shift 2 ;;
*) COMPONENT="$COMPONENT $1"; shift ;;
esac
done
COMPONENT="''${COMPONENT## }"

TARGETS=$(resolve_components "$COMPONENT") || exit 1
for c in $TARGETS; do
echo "==> Building $c (tag: $TAG)"
docker build \
-t "$(image_name "$c"):$TAG" \
-f ${webAppsDir}/"$c"/Dockerfile \
${webAppsDir}/
done
'';

scan.exec = ''
${resolveComponents}
TAG="latest"
FAIL_ON="critical"
COMPONENT=""
while [ $# -gt 0 ]; do
case "$1" in
--tag) TAG="$2"; shift 2 ;;
--fail-on) FAIL_ON="$2"; shift 2 ;;
*) COMPONENT="$COMPONENT $1"; shift ;;
esac
done
COMPONENT="''${COMPONENT## }"

TARGETS=$(resolve_components "$COMPONENT") || exit 1
EXIT=0
for c in $TARGETS; do
build "$c" --tag "$TAG"

IMAGE="$(image_name "$c"):$TAG"
echo ""
echo "==> Scanning $IMAGE (fail-on: $FAIL_ON)"
if ! grype "$IMAGE" --fail-on "$FAIL_ON" --only-fixed; then
EXIT=1
fi
done
exit $EXIT
'';
};

enterShell = ''
echo "$GREET"
echo ""
echo "Commands (component = chat | image-analysis | flux-image-gen | omit for all):"
echo ""
echo " prek -a Format/lint all files"
echo " build [component] [--tag TAG] Build container image(s)"
echo " scan [component] [--tag TAG] [--fail-on SEV] Build if needed + Grype scan"
echo ""
'';
}
13 changes: 13 additions & 0 deletions devenv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
inputs:
git-hooks:
url: github:cachix/git-hooks.nix
inputs:
nixpkgs:
follows: nixpkgs
nixpkgs:
url: github:NixOS/nixpkgs/nixpkgs-unstable
treefmt-nix:
url: github:numtide/treefmt-nix
inputs:
nixpkgs:
follows: nixpkgs
12 changes: 9 additions & 3 deletions scripts/perf-test/stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
prompts = [
"Hi, how are you?",
"What's the weather like with you?",
"Who's the best footballer of all time?"
"Who's the best footballer of all time?",
]

client_count = 3
request_count = 5 # Requests per client
request_count = 5 # Requests per client


def make_requests(client_id: int):
client = Client(url)
Expand All @@ -32,7 +33,12 @@ def make_requests(client_id: int):
timings.append(time.time() - start_time)
return timings

results = list(Parallel(n_jobs=client_count)(delayed(make_requests)(i) for i in range(1, client_count+1)))

results = list(
Parallel(n_jobs=client_count)(
delayed(make_requests)(i) for i in range(1, client_count + 1)
)
)
all_timings = []
for client_timings in results:
all_timings += client_timings
Expand Down
25 changes: 14 additions & 11 deletions web-apps/chat/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,26 @@ class PossibleSystemPromptException(Exception):
streaming=True,
)


def inference(latest_message, history):
# Allow mutating global variable
global BACKEND_INITIALISED
log.debug("Inference request received with history: %s", history)

try:
context = []
model_instruction = settings.model_instruction.replace("{date}", f"{date.today()}")
model_instruction = settings.model_instruction.replace(
"{date}", f"{date.today()}"
)
if INCLUDE_SYSTEM_PROMPT:
context.append(SystemMessage(content=model_instruction))
elif history and len(history) > 0:
# Mimic system prompt by prepending it to first human message
history[0]['content'] = f"{model_instruction}\n\n{history[0]['content']}"
history[0]["content"] = f"{model_instruction}\n\n{history[0]['content']}"

for message in history:
role = message['role']
content = message['content']
role = message["role"]
content = message["content"]
if role == "user":
context.append(HumanMessage(content=content))
else:
Expand All @@ -102,10 +105,10 @@ def inference(latest_message, history):
# The "think" tags mark the chatbot's reasoning. Remove the content
# and replace with "Thinking..." until the closing tag is found.
content = chunk.content
if '<think>' in content or thinking:
if "<think>" in content or thinking:
thinking = True
response = "Thinking..."
if '</think>' in content:
if "</think>" in content:
thinking = False
response = ""
else:
Expand Down Expand Up @@ -175,7 +178,7 @@ def inference_wrapper(*args):
js=settings.custom_javascript,
title=settings.page_title,
) as demo:
gr.Markdown('# ' + settings.page_title)
gr.Markdown("# " + settings.page_title)
gr.ChatInterface(
inference_wrapper,
type="messages",
Expand All @@ -187,10 +190,10 @@ def inference_wrapper(*args):
sanitize_html=True,
autoscroll=False,
latex_delimiters=[
{"left": "$$", "right": "$$", "display": True },
{"left": "$", "right": "$", "display": False }
],
),
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
],
),
)


Expand Down
2 changes: 1 addition & 1 deletion web-apps/chat/gradio-client-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
gradio_host = sys.argv[1]

retries = 60
for n in range(1, retries+1):
for n in range(1, retries + 1):
try:
client = Client(gradio_host)
result = client.predict("Hi", api_name="/chat")
Expand Down
2 changes: 2 additions & 0 deletions web-apps/chat/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
url = os.environ.get("GRADIO_URL", "http://localhost:7860")
client = Client(url)


class TestSuite(unittest.TestCase):

def test_gradio_api(self):
Expand All @@ -19,5 +20,6 @@ def test_gradio_api(self):
# # mock_response.assert_called_once_with("Hi", [])
# self.assertEqual(result, "Mocked")


if __name__ == "__main__":
unittest.main()
6 changes: 5 additions & 1 deletion web-apps/flux-image-gen/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ class ImageGenInput(BaseModel):
prompt: str
add_sampling_metadata: bool


@app.get("/")
def health_check():
return "Server is running"


@app.get("/model")
async def get_model():
return {"model": model}
Expand All @@ -61,7 +63,9 @@ async def generate_image(input: ImageGenInput):
add_sampling_metadata=input.add_sampling_metadata,
)
if not image:
return JSONResponse({"error": {"message": msg, "seed": seed}}, status_code=400)
return JSONResponse(
{"error": {"message": msg, "seed": seed}}, status_code=400
)
# Convert image to bytes response
buffer = io.BytesIO()
image.save(buffer, format="jpeg")
Expand Down
Loading
Loading