opea-project · rbogdano · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 2, 2026
diff --git a/core/helm-charts/vllm/xeon-values.yaml b/core/helm-charts/vllm/xeon-values.yaml
@@ -234,6 +234,39 @@ modelConfigs:
     tensor_parallel_size: "{{ .Values.tensor_parallel_size }}"
     pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}"
 
+  # Qwen/Qwen3-Coder-30B-A3B-Instruct — MoE tool-calling model (~80 GB RAM)
+  "Qwen/Qwen3-Coder-30B-A3B-Instruct":
+    configMapValues:
+      VLLM_CPU_KVCACHE_SPACE: "10"
+      VLLM_RPC_TIMEOUT: "100000"
+      VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1"
+      VLLM_ENGINE_ITERATION_TIMEOUT_S: "120"
+      VLLM_CPU_NUM_OF_RESERVED_CPU: "0"
+      VLLM_CPU_SGL_KERNEL: "1"
+      HF_HUB_DISABLE_XET: "1"
+      LOGNAME: "vllm"
+    extraCmdArgs:
+      [
+        "--block-size",
+      "128",
+      "--dtype",
+      "bfloat16",
+      "--max-model-len",
+      "32768",
+      "--distributed_executor_backend",
+      "mp",
+      "--enable_chunked_prefill",
+      "--enforce-eager",
+      "--max-num-batched-tokens",
+      "2048",
+      "--max-num-seqs",
+      "8",
+       "--enable-auto-tool-choice",
+       "--tool-call-parser", "qwen3_coder",
+      ]
+    tensor_parallel_size: "{{ .Values.tensor_parallel_size }}"
+    pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}"
+
 defaultModelConfigs:
   configMapValues:
     VLLM_CPU_KVCACHE_SPACE: "40"

diff --git a/core/lib/models/model-selection.sh b/core/lib/models/model-selection.sh
@@ -58,9 +58,10 @@ model_selection(){
                             echo "24. deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
                             echo "25. Qwen/Qwen3-1.7B"
                             echo "26. Qwen/Qwen3-4B-Instruct-2507"
+                            echo "27. Qwen/Qwen3-Coder-30B-A3B-Instruct"
                             read -p "Enter the number of the CPU model you want to deploy/remove: " cpu_model
                             # Validate input
-                            if ! [[ "$cpu_model" =~ ^(21|22|23|24|25|26)$ ]]; then
+                            if ! [[ "$cpu_model" =~ ^(21|22|23|24|25|26|27)$ ]]; then
                                 echo "Error: Invalid model selected ($cpu_model). Exiting." >&2
                                 exit 1
                             fi
@@ -242,14 +243,21 @@ get_model_names() {
                 fi
                 model_names+=("cpu-qwen3-4b")
                 ;;
+            27)
+                if [ "$cpu_or_gpu" = "g" ]; then
+                    echo "Error: CPU model identifier provided for GPU deployment/removal." >&2
+                    exit 1
+                fi
+                model_names+=("cpu-qwen3-coder-30b")
+                ;;
             "llama-8b"|"llama-70b"|"codellama-34b"|"mixtral-8x-7b"|"mistral-7b"|"tei"|"tei-rerank"|"falcon3-7b"|"deepseek-r1-distill-qwen-32b"|"deepseek-r1-distill-llama8b"|"llama3-405b"|"llama-3-3-70b"|"llama-4-scout-17b"|"qwen-2-5-32b")
                 if [ "$cpu_or_gpu" = "c" ]; then
                     echo "Error: GPU model identifier provided for CPU deployment/removal." >&2
                     exit 1
                 fi
                 model_names+=("$model")
                 ;;
-            "cpu-llama-8b"|"cpu-deepseek-r1-distill-qwen-32b"|"cpu-deepseek-r1-distill-llama8b"|"cpu-qwen3-1-7b"|"cpu-llama-3-2-3b"|"cpu-qwen3-4b")
+            "cpu-llama-8b"|"cpu-deepseek-r1-distill-qwen-32b"|"cpu-deepseek-r1-distill-llama8b"|"cpu-qwen3-1-7b"|"cpu-llama-3-2-3b"|"cpu-qwen3-4b"|"cpu-qwen3-coder-30b")
                 if [ "$cpu_or_gpu" = "g" ]; then
                     echo "Error: CPU model identifier provided for GPU deployment/removal." >&2
                     exit 1

diff --git a/core/playbooks/deploy-inference-models.yml b/core/playbooks/deploy-inference-models.yml
@@ -2582,6 +2582,110 @@
         - "'cpu-qwen3-4b' in (model_name_list | regex_replace(',', ' ') | split())"
 
 
+    - name: Check if CPU Qwen3-Coder-30B is installed
+      ansible.builtin.command:
+        cmd: "helm list --filter vllm-qwen3-coder-30b-cpu --short"
+      register: helm_release_installed
+      ignore_errors: true
+      run_once: true
+      tags: uninstall-cpu-qwen3-coder-30b
+    - name: Uninstall Qwen3-Coder-30B
+      ansible.builtin.command:
+        cmd: "helm uninstall vllm-qwen3-coder-30b-cpu"
+      run_once: true
+      tags: uninstall-cpu-qwen3-coder-30b
+      when:
+        - model_name_list is defined
+        - "'cpu-qwen3-coder-30b' in (model_name_list | regex_replace(',', ' ') | split())"
+        - uninstall_true == 'true'
+        - helm_release_installed.stdout != ""
+
+    - name: Deploy CPU Qwen3-Coder-30B LLM model
+      block:
+        - name: Delete Ingress resource Qwen3-Coder-30B from default namespace
+          tags: install-cpu-qwen3-coder-30b
+          ignore_errors: yes
+          kubernetes.core.k8s:
+            kind: Ingress
+            namespace: default
+            name: vllm-qwen3-coder-30b-cpu-ingress
+            state: absent
+        - name: Delete Ingress resource Qwen3-Coder-30B from auth-apisix namespace
+          tags: install-cpu-qwen3-coder-30b
+          ignore_errors: yes
+          kubernetes.core.k8s:
+            kind: Ingress
+            namespace: auth-apisix
+            name: vllm-qwen3-coder-30b-cpu-ingress
+            state: absent
+        - name: Deploy CPU based LLM model Qwen3-Coder-30B Installation
+          ansible.builtin.command: >-
+            helm upgrade --install vllm-qwen3-coder-30b-cpu "{{ remote_helm_charts_base }}/vllm"
+            --values "{{ xeon_values_file }}"
+            --set LLM_MODEL_ID="Qwen/Qwen3-Coder-30B-A3B-Instruct"
+            --set global.monitoring="{{ vllm_metrics_enabled }}"
+            --set svcmonitor.enabled="{{ vllm_metrics_enabled }}"
+            --set global.HUGGINGFACEHUB_API_TOKEN={{ hugging_face_token }}
+            {% if cpu_playbook == 'true' %}
+            --set cpu_balloon_annotation="vllm-balloon"
+            --set podLabels.name="vllm"
+            --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}"
+            --set memory="{{ optimal_memory_gb | default(8) }}Gi"
+            --set tensor_parallel_size={{ tensor_parallel_size | default(1) }}
+            --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }}
+            {% endif %}
+            {% if apisix_enabled %}
+              --set apisix.enabled={{ apisix_enabled }}
+              --set platform={{ kubernetes_platform }}
+            {% endif %}
+            {% if kubernetes_platform == 'openshift' %}
+              --set route.enabled=true
+              --set route.host={{ secret_name }}
+              --set route.tls.termination=edge
+              --set route.tls.insecureEdgeTerminationPolicy=Redirect
+              --set ingress.enabled=false
+            {% elif ingress_enabled %}
+              --set ingress.enabled={{ ingress_enabled }}
+              --set ingress.host={{ secret_name }}
+              --set ingress.secretname={{ secret_name }}
+              --set route.enabled=false
+              {% if kubernetes_platform == "eks" %}
+              --set aws_certificate_arn={{ aws_certificate_arn | default('') }}
+              {% endif %}
+            {% endif %}
+            {% if deploy_keycloak == 'yes' and apisix_enabled %}
+              --set oidc.client_id={{ keycloak_client_id | default('') }}
+              --set oidc.client_secret={{ client_secret | default('') }}
+            {% endif %}
+            {{ helm_proxy_args | default('') }}
+            --force
+          register: helm_upgrade_install_model_deployment_cpu_qwen3_coder_30b
+          failed_when: helm_upgrade_install_model_deployment_cpu_qwen3_coder_30b.rc != 0
+        - name: Register Xeon/CPU Qwen3-Coder-30B model
+          import_tasks: register-model-genai-gateway.yml
+          vars:
+            reg_model_name: "Qwen/Qwen3-Coder-30B-A3B-Instruct"
+            reg_litellm_model: "openai/Qwen/Qwen3-Coder-30B-A3B-Instruct"
+            reg_custom_llm_provider: "openai"
+            reg_api_base: "http://vllm-qwen3-coder-30b-cpu-service.default/v1"
+            reg_input_cost_per_token: 0.001
+            reg_output_cost_per_token: 0.002
+          tags:
+            - install-cpu-qwen3-coder-30b
+            - install-genai-gateway
+          run_once: true
+          when:
+            - "'install-cpu-qwen3-coder-30b' in ansible_run_tags"
+            - "'install-genai-gateway' in ansible_run_tags"
+      run_once: true
+      tags: install-cpu-qwen3-coder-30b
+      when:
+        - model_name_list is defined
+        - cpu_playbook == 'true'
+        - install_true == 'true'
+        - "'cpu-qwen3-coder-30b' in (model_name_list | regex_replace(',', ' ') | split())"
+
+
 
     - name: List of Models to be Installed
       tags: always

diff --git a/sample_solutions/AgenticCodeExecution/.env b/sample_solutions/AgenticCodeExecution/.env
@@ -0,0 +1,2 @@
+# === Domain selection (retail, airline, stocks, banking, triage) ===
+# MCP_DOMAIN=retail
diff --git a/sample_solutions/AgenticCodeExecution/.gitignore b/sample_solutions/AgenticCodeExecution/.gitignore
@@ -0,0 +1,24 @@
+# Python bytecode/cache
+__pycache__/
+*.py[cod]
+
+# Environment files
+.env.*
+!.env.example
+!.env
+
+# Prompt-builder generated artifacts
+prompt-builder/data/retail/runs/
+prompt-builder/data/retail/optimization_report.json
+prompt-builder/data/retail/retail_optimized_system_prompt.txt
+prompt-builder/data/retail/retail_first10_validation.json
+prompt-builder/data/retail/sections/section1_generic.txt
+prompt-builder/data/retail/sections/section2_seed.txt
+prompt-builder/data/retail/sections/section3_policy.txt
+
+# tau2-bench database files (downloaded separately, see README)
+examples/airline/data/db.json
+examples/retail/data/db.json
+
+# Runtime session data
+examples/session_dbs/
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# === Domain selection (retail, airline, stocks, banking, triage) ===
		# MCP_DOMAIN=retail