6 өдөр өмнө · c9457bb38b
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,27 @@
 
				+# ── Vault secrets — NEVER commit anything from vault/ except .gitignore ──────
			
 
				+vault/*
			
 
				+!vault/.gitignore
			
 
				+
			
 
				+# ── Local environment overrides — never commit ───────────────────────────────
			
 
				+local.yml
			
 
				+inventory/local.yml
			
 
				+
			
 
				+# ── Ansible generated files ───────────────────────────────────────────────────
			
 
				+*.retry
			
 
				+
			
 
				+# ── Python ────────────────────────────────────────────────────────────────────
			
 
				+__pycache__/
			
 
				+*.pyc
			
 
				+*.pyo
			
 
				+.venv/
			
 
				+venv/
			
 
				+
			
 
				+# ── Editor ────────────────────────────────────────────────────────────────────
			
 
				+.idea/
			
 
				+.vscode/
			
 
				+*.swp
			
 
				+*.swo
			
 
				+
			
 
				+# ── OS ────────────────────────────────────────────────────────────────────────
			
 
				+.DS_Store
			
 
				+Thumbs.db
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,356 @@
 
				+# ai-platform -- Local AI Server Automation
			
 
				+
			
 
				+Ansible automation for full lifecycle management of a server as a
			
 
				+local AI inference platform. This project provisions, configures, benchmarks, and
			
 
				+maintains every service required to run Ollama-based LLM inference behind NGINX
			
 
				+reverse proxy with SSO, vector search (RAG), DNS, secret management, and Telegram
			
 
				+bot access -- all driven by a single `ansible-playbook deploy_ai.yml` command.
			
 
				+
			
 
				+## Architecture
			
 
				+
			
 
				+```
			
 
				+                         ┌──────────────┐
			
 
				+                         │   Internet   │
			
 
				+                         └──────┬───────┘
			
 
				+                                │
			
 
				+                       ┌────────▼────────┐
			
 
				+                       │  nginx_proxy    │
			
 
				+                       │  192.168.1.30   │
			
 
				+                       │  NGINX reverse  │
			
 
				+                       │  proxy + TLS    │
			
 
				+                       └──┬──────────┬───┘
			
 
				+                          │          │
			
 
				+          ┌───────────────▼┐    ┌────▼──────────────────────┐
			
 
				+          │ coredns_host   │    │ ai_server                 │
			
 
				+          │ 192.168.1.29   │    │ 192.168.1.100             │
			
 
				+          │                │    │                            │
			
 
				+          │ - CoreDNS      │    │ - Ollama (LLM inference)  │
			
 
				+          └────────────────┘    │ - Open WebUI              │
			
 
				+                                │ - Keycloak (SSO/OIDC)     │
			
 
				+                                │ - HashiCorp Vault         │
			
 
				+                                │ - Qdrant (vector DB)      │
			
 
				+                                │ - OpenClaw (Telegram bot) │
			
 
				+                                └───────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+## Infrastructure Map
			
 
				+
			
 
				+| Host           | IP Address     | Purpose                          |
			
 
				+|----------------|----------------|----------------------------------|
			
 
				+| `nginx_proxy`  | 192.168.1.30   | NGINX reverse proxy, TLS termination |
			
 
				+| `coredns_host` | 192.168.1.29   | CoreDNS                          |
			
 
				+| `ai_server`    | 192.168.1.100  | Ollama, Open WebUI, Keycloak, Vault, Qdrant, OpenClaw |
			
 
				+
			
 
				+> These are the **default** values in `inventory/group_vars/all.yml`. Override for your environment — see [Configuration](#configuration) below.
			
 
				+
			
 
				+## Service URLs
			
 
				+
			
 
				+| Service    | URL (default `domain: example.com`)       |
			
 
				+|------------|-------------------------------------------|
			
 
				+| Open WebUI | https://ollama-ui.example.com             |
			
 
				+| Ollama API | https://ollama-api.example.com            |
			
 
				+| Keycloak   | https://idm.example.com                   |
			
 
				+| Vault      | https://vault.example.com                 |
			
 
				+
			
 
				+## Configuration
			
 
				+
			
 
				+All environment-specific values are variables with generic defaults in
			
 
				+`inventory/group_vars/all.yml`. Override them in `local.yml` (gitignored).
			
 
				+
			
 
				+| Variable            | Default                              | Description                                         |
			
 
				+|---------------------|--------------------------------------|-----------------------------------------------------|
			
 
				+| `domain`            | `example.com`                        | Base domain for all service URLs                    |
			
 
				+| `ai_server_ip`      | `192.168.1.100`                      | IP of the AI inference server                       |
			
 
				+| `nginx_proxy_ip`    | `192.168.1.30`                       | IP of the NGINX reverse proxy                       |
			
 
				+| `coredns_host_ip`   | `192.168.1.29`                       | IP of the CoreDNS host                              |
			
 
				+| `ansible_user`      | `admin`                              | SSH user on all managed hosts                       |
			
 
				+| `platform_name`     | `"AI Platform"`                      | Display name used in WebUI, Keycloak, and summaries |
			
 
				+| `vault_project_slug`| `"ai-platform"`                      | Slug for Keycloak realm name and Vault secret paths |
			
 
				+| `nginx_ssl_cert`    | `/etc/nginx/ssl/{{ domain }}.crt`    | Path to TLS certificate on nginx_proxy              |
			
 
				+| `nginx_ssl_key`     | `/etc/nginx/ssl/{{ domain }}.key`    | Path to TLS private key on nginx_proxy              |
			
 
				+
			
 
				+> If you use Let's Encrypt, override `nginx_ssl_cert` and `nginx_ssl_key` in
			
 
				+> `local.yml` to point to your certbot paths (e.g.
			
 
				+> `/etc/letsencrypt/live/your-domain/fullchain.pem`).
			
 
				+
			
 
				+### Setup: two gitignored local files
			
 
				+
			
 
				+Configuration is split across two gitignored files — create both before first run.
			
 
				+
			
 
				+**`inventory/local.yml`** — SSH connection details (host IPs and user):
			
 
				+
			
 
				+```yaml
			
 
				+# inventory/local.yml
			
 
				+all:
			
 
				+  hosts:
			
 
				+    ai_server:
			
 
				+      ansible_host: 10.0.1.50
			
 
				+      ansible_user: myuser
			
 
				+    nginx_proxy:
			
 
				+      ansible_host: 10.0.1.10
			
 
				+      ansible_user: myuser
			
 
				+    coredns_host:
			
 
				+      ansible_host: 10.0.1.9
			
 
				+      ansible_user: myuser
			
 
				+```
			
 
				+
			
 
				+Ansible reads the `inventory/` directory automatically (`ansible.cfg` sets
			
 
				+`inventory = inventory/`), so `inventory/local.yml` is merged with
			
 
				+`inventory/hosts.yml` on every run — no extra flags needed.
			
 
				+
			
 
				+The `inventory/` directory also contains `group_vars/` and `host_vars/`, which
			
 
				+ensures Ansible finds them regardless of which playbook is run directly.
			
 
				+
			
 
				+**`local.yml`** — play variables (domain, platform identity, SSL certs, etc.):
			
 
				+
			
 
				+```yaml
			
 
				+# local.yml
			
 
				+domain: mylab.internal
			
 
				+ai_server_ip: 10.0.1.50
			
 
				+nginx_proxy_ip: 10.0.1.10
			
 
				+coredns_host_ip: 10.0.1.9
			
 
				+platform_name: "My AI Platform"
			
 
				+vault_project_slug: my-ai
			
 
				+nginx_ssl_cert: /etc/letsencrypt/live/mylab.internal/fullchain.pem
			
 
				+nginx_ssl_key: /etc/letsencrypt/live/mylab.internal/privkey.pem
			
 
				+```
			
 
				+
			
 
				+> `ai_server_ip`, `nginx_proxy_ip`, and `coredns_host_ip` appear in both files.
			
 
				+> `inventory/local.yml` controls where Ansible SSHs to; `local.yml` controls what
			
 
				+> gets rendered into config files and DNS records.
			
 
				+
			
 
				+### Alternative: inline `-e` flags (no local.yml)
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook deploy_ai.yml -K \
			
 
				+  -e "domain=mylab.internal" \
			
 
				+  -e "ai_server_ip=10.0.1.50" \
			
 
				+  -e "nginx_proxy_ip=10.0.1.10" \
			
 
				+  -e "coredns_host_ip=10.0.1.9" \
			
 
				+  -e "platform_name='My AI Platform'" \
			
 
				+  -e "vault_project_slug=my-ai" \
			
 
				+  -e "nginx_ssl_cert=/etc/letsencrypt/live/mylab.internal/fullchain.pem" \
			
 
				+  -e "nginx_ssl_key=/etc/letsencrypt/live/mylab.internal/privkey.pem"
			
 
				+```
			
 
				+
			
 
				+> `inventory/local.yml` must still exist for SSH to work — inline `-e` flags
			
 
				+> cannot set per-host connection variables.
			
 
				+
			
 
				+## Prerequisites
			
 
				+
			
 
				+- Ansible 2.14+
			
 
				+- Python 3.9+
			
 
				+- SSH access to all 3 hosts
			
 
				+- sudo privileges on all 3 hosts
			
 
				+- Ansible Galaxy collections:
			
 
				+
			
 
				+```bash
			
 
				+ansible-galaxy collection install -r requirements.yml
			
 
				+```
			
 
				+
			
 
				+## First-Run Quickstart
			
 
				+
			
 
				+```bash
			
 
				+git clone <repo>
			
 
				+cd ai-platform
			
 
				+ansible-galaxy collection install -r requirements.yml
			
 
				+
			
 
				+# 1. Create inventory/local.yml with your host IPs and SSH user (gitignored)
			
 
				+# 2. Create local.yml with your domain, platform name, SSL cert paths, etc. (gitignored)
			
 
				+# See the Configuration section above for the contents of each file.
			
 
				+
			
 
				+# 3. Deploy
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+> `-K` prompts for the sudo (become) password on the remote hosts.
			
 
				+
			
 
				+## Credential Management
			
 
				+
			
 
				+All secrets (API keys, passwords, OIDC client secrets) are stored in HashiCorp Vault
			
 
				+and **only written once** — re-running any playbook will never overwrite an existing
			
 
				+secret. This means `deploy_ai.yml` is safe to re-run at any time without breaking
			
 
				+running services.
			
 
				+
			
 
				+### Credential rotation
			
 
				+
			
 
				+To rotate a specific credential, delete it from Vault and re-run the full deploy:
			
 
				+
			
 
				+```bash
			
 
				+# Example: rotate Keycloak credentials
			
 
				+vault kv delete secret/<vault_project_slug>/keycloak
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+New credentials will be generated, stored in Vault, and all dependent services
			
 
				+(Keycloak, Open WebUI, Vault OIDC) will be redeployed in the correct order automatically.
			
 
				+
			
 
				+### Vault login
			
 
				+
			
 
				+Vault UI supports two login methods:
			
 
				+
			
 
				+- **Token** — use the root token from `vault/.vault-init.json` (emergency/admin use only)
			
 
				+- **OIDC** — select method `OIDC`, role `default`, click *Sign in with OIDC Provider*,
			
 
				+  authenticate via Keycloak. Only users with the `ai-admin` Keycloak role can log in.
			
 
				+
			
 
				+## User Roles
			
 
				+
			
 
				+Users are created in Keycloak at `https://idm.<domain>/admin/`. Assign roles
			
 
				+from the platform realm (not the `master` realm):
			
 
				+
			
 
				+| Role       | Open WebUI             | Vault OIDC  |
			
 
				+|------------|------------------------|-------------|
			
 
				+| `ai-user`  | ✅ Standard access     | ❌ Blocked  |
			
 
				+| `ai-admin` | ✅ Admin access        | ✅ Full access |
			
 
				+| *(none)*   | ❌ Blocked             | ❌ Blocked  |
			
 
				+
			
 
				+## Day-2 Operations
			
 
				+
			
 
				+**Full deploy / idempotent re-run:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Pre-flight checks only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml --tags preflight
			
 
				+```
			
 
				+
			
 
				+**Skip benchmarking on re-runs (faster):**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml --skip-tags benchmark
			
 
				+```
			
 
				+
			
 
				+**Vault only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/01_vault.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Docker + Ollama only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/02_infrastructure.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Re-benchmark all installed models:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Benchmark specific models only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_models=qwen2.5-coder:14b-instruct-q4_K_M,codestral:22b-v0.1-q4_K_M"
			
 
				+```
			
 
				+
			
 
				+**Pull recommended models if scores are below threshold:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml -e "pull_if_better=true"
			
 
				+```
			
 
				+
			
 
				+**Update warm-up slots after a benchmark:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Rotate slot 4 to a specific model:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot4_model=deepseek-r1:14b"
			
 
				+```
			
 
				+
			
 
				+**Redeploy Keycloak only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/05_keycloak.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Redeploy Open WebUI only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/07_openwebui.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Update NGINX configs only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/09_nginx.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Update CoreDNS records only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/10_coredns.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+**Configure Keycloak SSO login for Vault UI:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/11_vault_oidc.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+## Model Slot System
			
 
				+
			
 
				+Four models are kept warm in RAM at all times (`OLLAMA_MAX_LOADED_MODELS=4`, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled by the benchmark playbook — no model names are hardcoded.
			
 
				+
			
 
				+| Slot | Role                      | Selection                     | Rotation                              |
			
 
				+|------|---------------------------|-------------------------------|---------------------------------------|
			
 
				+| 1    | General-purpose primary   | Top general composite score   | Replaced if score < threshold         |
			
 
				+| 2    | General-purpose secondary | 2nd general composite score   | Replaced if score < threshold         |
			
 
				+| 3    | Coding primary            | Top coding composite score    | Locked; replaced only by re-benchmark |
			
 
				+| 4    | Coding secondary          | 2nd coding composite score    | Rotatable: `-e slot4_model=<name>`    |
			
 
				+
			
 
				+**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.15; otherwise `general`.
			
 
				+
			
 
				+## Verification Steps
			
 
				+
			
 
				+After a full `deploy_ai.yml` run, verify the deployment (substitute your actual `domain` and IPs):
			
 
				+
			
 
				+1. **Vault health** -- `curl -s https://vault.example.com/v1/sys/health` returns `initialized: true, sealed: false`
			
 
				+2. **Vault OIDC login** -- select OIDC method, role `default`, authenticate with an `ai-admin` Keycloak user
			
 
				+3. **Ollama API** -- `curl -s https://ollama-api.example.com/api/tags` returns model list
			
 
				+4. **Open WebUI** -- browse to https://ollama-ui.example.com, SSO login works with `ai-user` or `ai-admin`
			
 
				+5. **Keycloak admin** -- browse to https://idm.example.com/admin/, login with `admin` credentials from Vault
			
 
				+6. **Qdrant health** -- `curl -s http://<ai_server_ip>:6333/healthz` returns OK
			
 
				+7. **CoreDNS resolution** -- `dig @<coredns_host_ip> vault.example.com` returns `<nginx_proxy_ip>`
			
 
				+8. **NGINX configs** -- `ssh <nginx_proxy_ip> 'sudo nginx -t'` passes
			
 
				+9. **OpenClaw** -- send a message to the Telegram bot, confirm response
			
 
				+10. **Benchmark report** -- check `benchmarks/results/benchmark_<timestamp>.md` for latest results
			
 
				+
			
 
				+## Role Reference
			
 
				+
			
 
				+| Role         | README                                  | Purpose                        |
			
 
				+|--------------|-----------------------------------------|--------------------------------|
			
 
				+| preflight    | [roles/preflight/README.md](roles/preflight/README.md)   | Pre-flight validation          |
			
 
				+| hashi_vault  | [roles/hashi_vault/README.md](roles/hashi_vault/README.md) | HashiCorp Vault deployment     |
			
 
				+| docker       | [roles/docker/README.md](roles/docker/README.md)         | Docker CE installation         |
			
 
				+| ollama       | [roles/ollama/README.md](roles/ollama/README.md)         | Ollama inference server        |
			
 
				+| benchmark    | [roles/benchmark/README.md](roles/benchmark/README.md)   | Model benchmarking             |
			
 
				+| models       | [roles/models/README.md](roles/models/README.md)         | Model lifecycle management     |
			
 
				+| keycloak     | [roles/keycloak/README.md](roles/keycloak/README.md)     | Keycloak SSO/OIDC              |
			
 
				+| qdrant       | [roles/qdrant/README.md](roles/qdrant/README.md)         | Qdrant vector database         |
			
 
				+| openwebui    | [roles/openwebui/README.md](roles/openwebui/README.md)   | Open WebUI deployment          |
			
 
				+| openclaw     | [roles/openclaw/README.md](roles/openclaw/README.md)     | OpenClaw Telegram bot          |
			
 
				+| nginx        | [roles/nginx/README.md](roles/nginx/README.md)           | NGINX reverse proxy            |
			
 
				+| coredns      | [roles/coredns/README.md](roles/coredns/README.md)       | CoreDNS zone management        |
			
 
				+
			
 
				+## Security Notes
			
 
				+
			
 
				+- `vault/.vault-init.json` and `vault/.vault-token` are gitignored -- they contain
			
 
				+  Vault unseal keys and root tokens. **Never commit these files.**
			
 
				+- `local.yml` and `inventory/local.yml` are gitignored -- they contain your
			
 
				+  environment-specific IPs, usernames, and cert paths. **Never commit these files.**
			
 
				+- All service secrets (database passwords, API keys, OIDC client secrets) are stored
			
 
				+  in HashiCorp Vault and injected at deploy time. Secrets are never regenerated unless
			
 
				+  explicitly deleted from Vault.
			
 
				+- Ollama API is protected by `OLLAMA_API_KEY` to prevent unauthenticated access.
			
 
				+- TLS termination happens at the NGINX reverse proxy layer.
			
 
				+- Open WebUI and Vault UI both require a valid Keycloak role to access via SSO.
			
--- a/ansible.cfg
+++ b/ansible.cfg
@@ -0,0 +1,19 @@
 
				+[defaults]
			
 
				+inventory          = inventory/
			
 
				+roles_path         = roles
			
 
				+collections_path   = ~/.ansible/collections
			
 
				+remote_user        = admin
			
 
				+host_key_checking  = False
			
 
				+retry_files_enabled = False
			
 
				+stdout_callback    = default
			
 
				+result_format      = yaml
			
 
				+callbacks_enabled  = profile_tasks
			
 
				+
			
 
				+[privilege_escalation]
			
 
				+become       = True
			
 
				+become_method = sudo
			
 
				+become_user  = root
			
 
				+
			
 
				+[ssh_connection]
			
 
				+pipelining   = True
			
 
				+ssh_args     = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no
			
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -0,0 +1,135 @@
 
				+# Benchmarks
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+Dynamic benchmark system for all installed Ollama models. Runs a suite of coding and
			
 
				+general-purpose tests against every model currently available on the Ollama server,
			
 
				+scores each model on a composite metric, and assigns models to the 4-slot system
			
 
				+based on results.
			
 
				+
			
 
				+## How to Run
			
 
				+
			
 
				+**Benchmark all installed models:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/05_benchmark.yml
			
 
				+```
			
 
				+
			
 
				+**Benchmark specific models only:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/05_benchmark.yml -e '{"benchmark_specific_models":["qwen2.5-coder:14b","deepseek-coder-v2:16b"]}'
			
 
				+```
			
 
				+
			
 
				+**Benchmark with automatic model pulling if a better model is found:**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/05_benchmark.yml -e pull_if_better=true
			
 
				+```
			
 
				+
			
 
				+## Test Suites
			
 
				+
			
 
				+### Coding Tests
			
 
				+
			
 
				+| Test       | Prompt                                                         | What Is Measured              |
			
 
				+|------------|----------------------------------------------------------------|-------------------------------|
			
 
				+| `code_gen` | "Write a Python function that implements binary search on a sorted list. Include type hints and docstring." | Correctness (def + return present), code structure, tokens/sec |
			
 
				+| `debug`    | "Find and fix the bug in this Python code: `def factorial(n): return n * factorial(n)`. Explain the issue." | Identifies base case bug, explanation quality, tokens/sec |
			
 
				+| `refactor` | "Refactor this code to use list comprehension: `result = []; for i in range(10): if i % 2 == 0: result.append(i*i)`" | Produces list comprehension, conciseness, tokens/sec |
			
 
				+
			
 
				+### General Tests
			
 
				+
			
 
				+| Test        | Prompt                                                        | What Is Measured              |
			
 
				+|-------------|---------------------------------------------------------------|-------------------------------|
			
 
				+| `explain`   | "Explain the concept of recursion to a beginner programmer. Use a simple analogy." | Clarity, analogy presence, length adequacy, tokens/sec |
			
 
				+| `creative`  | "Write a short poem about artificial intelligence."           | Creativity (line count, poetic structure), tokens/sec |
			
 
				+| `reasoning` | "A farmer has 17 sheep. All but 9 die. How many are left? Explain your reasoning step by step." | Correct answer (9), step-by-step reasoning, tokens/sec |
			
 
				+
			
 
				+### Latency Test
			
 
				+
			
 
				+| Test      | Prompt | What Is Measured           |
			
 
				+|-----------|--------|----------------------------|
			
 
				+| `latency` | "Hi"   | Time to first token (TTFT) |
			
 
				+
			
 
				+## Scoring
			
 
				+
			
 
				+### Metrics Collected from Ollama API
			
 
				+
			
 
				+- **tokens/sec** -- generation throughput from `/api/generate` response
			
 
				+- **TTFT** (time to first token) -- measured from request start to first streamed token
			
 
				+- **Quality heuristics** -- regex and length checks specific to each test type
			
 
				+
			
 
				+### Composite Score Formula
			
 
				+
			
 
				+For each category (coding, general), a composite score is calculated:
			
 
				+
			
 
				+```
			
 
				+composite = (quality * 0.45) + (tokens_per_sec_normalized * 0.30) + (latency_score * 0.25)
			
 
				+```
			
 
				+
			
 
				+Where:
			
 
				+- `quality` is 0.0-1.0 based on heuristic checks for the test type
			
 
				+- `tokens_per_sec_normalized` is the model's tokens/sec divided by the fastest model's tokens/sec
			
 
				+- `latency_score` is 1.0 - (model_ttft / slowest_ttft)
			
 
				+
			
 
				+### Classification Rule
			
 
				+
			
 
				+A model is classified as a **coding** model if:
			
 
				+
			
 
				+```
			
 
				+coding_composite - general_composite >= 0.15
			
 
				+```
			
 
				+
			
 
				+Otherwise it is classified as **general**.
			
 
				+
			
 
				+## Thresholds and Configuration
			
 
				+
			
 
				+All thresholds are configurable via `group_vars/all.yml`:
			
 
				+
			
 
				+| Key                            | Default | Description                                    |
			
 
				+|--------------------------------|---------|------------------------------------------------|
			
 
				+| `benchmark_min_tokens_per_sec` | 10      | Minimum tokens/sec to pass a model             |
			
 
				+| `benchmark_max_ttft_ms`        | 5000    | Maximum time to first token in milliseconds    |
			
 
				+| `benchmark_quality_weight`     | 0.45    | Weight of quality score in composite            |
			
 
				+| `benchmark_speed_weight`       | 0.30    | Weight of tokens/sec in composite               |
			
 
				+| `benchmark_latency_weight`     | 0.25    | Weight of latency score in composite            |
			
 
				+| `benchmark_coding_threshold`   | 0.15    | Minimum coding-general delta for coding classification |
			
 
				+
			
 
				+## Output Format
			
 
				+
			
 
				+### Benchmark Report
			
 
				+
			
 
				+Each run produces `benchmarks/benchmark_<timestamp>.md` with a results table:
			
 
				+
			
 
				+```
			
 
				+| Model                  | Coding Composite | General Composite | Classification | Tokens/sec | TTFT (ms) |
			
 
				+|------------------------|------------------|-------------------|----------------|------------|-----------|
			
 
				+| qwen2.5-coder:14b      | 0.82             | 0.65              | coding         | 38.2       | 420       |
			
 
				+| deepseek-coder-v2:16b  | 0.78             | 0.63              | coding         | 35.1       | 510       |
			
 
				+| llama3.1:8b            | 0.61             | 0.74              | general        | 52.3       | 280       |
			
 
				+| mistral:7b             | 0.58             | 0.71              | general        | 55.8       | 250       |
			
 
				+```
			
 
				+
			
 
				+### Model Selection File
			
 
				+
			
 
				+Results are also written to `model_selection.json`:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "timestamp": "2025-01-15T10:30:00Z",
			
 
				+  "slot1_coding": "qwen2.5-coder:14b",
			
 
				+  "slot2_general": "llama3.1:8b",
			
 
				+  "slot3_backup": "deepseek-coder-v2:16b",
			
 
				+  "slot4_experimental": null,
			
 
				+  "results": { ... }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Slot Selection
			
 
				+
			
 
				+Slots are assigned from benchmark results as follows:
			
 
				+
			
 
				+1. **Slot 1 (Primary Coding)** -- model with the highest `coding_composite` score
			
 
				+2. **Slot 2 (Primary General)** -- model with the highest `general_composite` score
			
 
				+3. **Slot 3 (Secondary / Backup)** -- next-best model by overall average composite
			
 
				+4. **Slot 4 (Experimental)** -- not assigned by benchmarks; set manually via `-e slot4_model=<name>`
			
--- a/benchmarks/results/.gitkeep
+++ b/benchmarks/results/.gitkeep
--- a/benchmarks/results/benchmark_20260307T125148.md
+++ b/benchmarks/results/benchmark_20260307T125148.md
@@ -0,0 +1,135 @@
 
				+# Benchmark Results - 20260307T125148
			
 
				+
			
 
				+## Model Selection
			
 
				+| Slot | Role | Model | Composite Score |
			
 
				+|------|------|-------|----------------|
			
 
				+| 1 | General (Primary) | deepseek-coder-v2:16b-lite-instruct-q4_K_M | 0.683 |
			
 
				+| 2 | General (Secondary) | qwen2.5-coder:7b-instruct-q4_K_M | 0.619 |
			
 
				+| 3 | Coding (Primary) | deepseek-coder-v2:16b-lite-instruct-q4_K_M | 0.618 |
			
 
				+| 4 | Coding (Secondary) | none | N/A |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### gpt-oss:20b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.978
			
 
				+- **General Quality**: 0.925
			
 
				+- **Avg Tokens/sec**: 10.3
			
 
				+- **Latency (ms)**: 8158.0
			
 
				+- **Coding Composite**: 0.471
			
 
				+- **General Composite**: 0.447
			
 
				+### deepseek-r1:14b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.853
			
 
				+- **General Quality**: 0.948
			
 
				+- **Avg Tokens/sec**: 6.4
			
 
				+- **Latency (ms)**: 2677.7
			
 
				+- **Coding Composite**: 0.519
			
 
				+- **General Composite**: 0.562
			
 
				+### phi4:14b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.904
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 4394.9
			
 
				+- **Coding Composite**: 0.457
			
 
				+- **General Composite**: 0.469
			
 
				+### qwen3-coder-next:latest
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.785
			
 
				+- **General Quality**: 0.892
			
 
				+- **Avg Tokens/sec**: 4.6
			
 
				+- **Latency (ms)**: 3462.7
			
 
				+- **Coding Composite**: 0.444
			
 
				+- **General Composite**: 0.492
			
 
				+### qwen3.5:35b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.879
			
 
				+- **General Quality**: 1.0
			
 
				+- **Avg Tokens/sec**: 5.3
			
 
				+- **Latency (ms)**: 133176.0
			
 
				+- **Coding Composite**: 0.411
			
 
				+- **General Composite**: 0.466
			
 
				+### qwen3-coder:30b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.885
			
 
				+- **General Quality**: 0.872
			
 
				+- **Avg Tokens/sec**: 7.9
			
 
				+- **Latency (ms)**: 1769.0
			
 
				+- **Coding Composite**: 0.584
			
 
				+- **General Composite**: 0.578
			
 
				+### qwen2.5-coder:7b-instruct-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.83
			
 
				+- **General Quality**: 0.887
			
 
				+- **Avg Tokens/sec**: 11.5
			
 
				+- **Latency (ms)**: 1301.7
			
 
				+- **Coding Composite**: 0.593
			
 
				+- **General Composite**: 0.619
			
 
				+### qwen2.5-coder:7b-instruct-q5_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.81
			
 
				+- **General Quality**: 0.925
			
 
				+- **Avg Tokens/sec**: 9.0
			
 
				+- **Latency (ms)**: 2900.9
			
 
				+- **Coding Composite**: 0.496
			
 
				+- **General Composite**: 0.548
			
 
				+### qwen2.5-coder:7b-instruct-q6_K
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.832
			
 
				+- **General Quality**: 0.919
			
 
				+- **Avg Tokens/sec**: 5.9
			
 
				+- **Latency (ms)**: 2112.8
			
 
				+- **Coding Composite**: 0.536
			
 
				+- **General Composite**: 0.576
			
 
				+### deepseek-coder-v2:16b-lite-instruct-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.855
			
 
				+- **General Quality**: 1.0
			
 
				+- **Avg Tokens/sec**: 21.3
			
 
				+- **Latency (ms)**: 1617.0
			
 
				+- **Coding Composite**: 0.618
			
 
				+- **General Composite**: 0.683
			
 
				+### qwen2.5-coder:14b-instruct-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.84
			
 
				+- **General Quality**: 0.848
			
 
				+- **Avg Tokens/sec**: 4.9
			
 
				+- **Latency (ms)**: 6865.3
			
 
				+- **Coding Composite**: 0.393
			
 
				+- **General Composite**: 0.396
			
 
				+### codellama:13b-instruct-q5_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.804
			
 
				+- **General Quality**: 0.671
			
 
				+- **Avg Tokens/sec**: 4.1
			
 
				+- **Latency (ms)**: 1126.4
			
 
				+- **Coding Composite**: 0.568
			
 
				+- **General Composite**: 0.508
			
 
				+### codestral:22b-v0.1-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.696
			
 
				+- **General Quality**: 0.887
			
 
				+- **Avg Tokens/sec**: 2.3
			
 
				+- **Latency (ms)**: 58429.3
			
 
				+- **Coding Composite**: 0.32
			
 
				+- **General Composite**: 0.406
			
 
				+### dolphin-mixtral:8x7b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.755
			
 
				+- **General Quality**: 0.725
			
 
				+- **Avg Tokens/sec**: 4.8
			
 
				+- **Latency (ms)**: 3065.7
			
 
				+- **Coding Composite**: 0.451
			
 
				+- **General Composite**: 0.437
			
 
				+### mistral:7b-instruct
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.846
			
 
				+- **General Quality**: 0.717
			
 
				+- **Avg Tokens/sec**: 12.1
			
 
				+- **Latency (ms)**: 6696.2
			
 
				+- **Coding Composite**: 0.417
			
 
				+- **General Composite**: 0.359
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Category: coding if (coding_composite - general_composite) >= 0.15, else general
			
--- a/benchmarks/results/model_selection.json
+++ b/benchmarks/results/model_selection.json
@@ -0,0 +1,401 @@
 
				+{
			
 
				+    "all_metrics": {
			
 
				+        "codellama:13b-instruct-q5_K_M": {
			
 
				+            "avg_tok_per_sec": 4.1,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.568,
			
 
				+            "coding_quality": 0.804,
			
 
				+            "general_composite": 0.508,
			
 
				+            "general_quality": 0.671,
			
 
				+            "latency_ms": 1126.4,
			
 
				+            "latency_score": 0.775,
			
 
				+            "toks_norm": 0.041
			
 
				+        },
			
 
				+        "codestral:22b-v0.1-q4_K_M": {
			
 
				+            "avg_tok_per_sec": 2.3,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.32,
			
 
				+            "coding_quality": 0.696,
			
 
				+            "general_composite": 0.406,
			
 
				+            "general_quality": 0.887,
			
 
				+            "latency_ms": 58429.3,
			
 
				+            "latency_score": 0,
			
 
				+            "toks_norm": 0.023
			
 
				+        },
			
 
				+        "deepseek-coder-v2:16b-lite-instruct-q4_K_M": {
			
 
				+            "avg_tok_per_sec": 21.3,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.618,
			
 
				+            "coding_quality": 0.855,
			
 
				+            "general_composite": 0.683,
			
 
				+            "general_quality": 1.0,
			
 
				+            "latency_ms": 1617.0,
			
 
				+            "latency_score": 0.677,
			
 
				+            "toks_norm": 0.213
			
 
				+        },
			
 
				+        "deepseek-r1:14b": {
			
 
				+            "avg_tok_per_sec": 6.4,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.519,
			
 
				+            "coding_quality": 0.853,
			
 
				+            "general_composite": 0.562,
			
 
				+            "general_quality": 0.948,
			
 
				+            "latency_ms": 2677.7,
			
 
				+            "latency_score": 0.464,
			
 
				+            "toks_norm": 0.064
			
 
				+        },
			
 
				+        "dolphin-mixtral:8x7b": {
			
 
				+            "avg_tok_per_sec": 4.8,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.451,
			
 
				+            "coding_quality": 0.755,
			
 
				+            "general_composite": 0.437,
			
 
				+            "general_quality": 0.725,
			
 
				+            "latency_ms": 3065.7,
			
 
				+            "latency_score": 0.387,
			
 
				+            "toks_norm": 0.048
			
 
				+        },
			
 
				+        "gpt-oss:20b": {
			
 
				+            "avg_tok_per_sec": 10.3,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.471,
			
 
				+            "coding_quality": 0.978,
			
 
				+            "general_composite": 0.447,
			
 
				+            "general_quality": 0.925,
			
 
				+            "latency_ms": 8158.0,
			
 
				+            "latency_score": 0,
			
 
				+            "toks_norm": 0.103
			
 
				+        },
			
 
				+        "mistral:7b-instruct": {
			
 
				+            "avg_tok_per_sec": 12.1,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.417,
			
 
				+            "coding_quality": 0.846,
			
 
				+            "general_composite": 0.359,
			
 
				+            "general_quality": 0.717,
			
 
				+            "latency_ms": 6696.2,
			
 
				+            "latency_score": 0,
			
 
				+            "toks_norm": 0.121
			
 
				+        },
			
 
				+        "phi4:14b": {
			
 
				+            "avg_tok_per_sec": 6.6,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.457,
			
 
				+            "coding_quality": 0.904,
			
 
				+            "general_composite": 0.469,
			
 
				+            "general_quality": 0.931,
			
 
				+            "latency_ms": 4394.9,
			
 
				+            "latency_score": 0.121,
			
 
				+            "toks_norm": 0.066
			
 
				+        },
			
 
				+        "qwen2.5-coder:14b-instruct-q4_K_M": {
			
 
				+            "avg_tok_per_sec": 4.9,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.393,
			
 
				+            "coding_quality": 0.84,
			
 
				+            "general_composite": 0.396,
			
 
				+            "general_quality": 0.848,
			
 
				+            "latency_ms": 6865.3,
			
 
				+            "latency_score": 0,
			
 
				+            "toks_norm": 0.049
			
 
				+        },
			
 
				+        "qwen2.5-coder:7b-instruct-q4_K_M": {
			
 
				+            "avg_tok_per_sec": 11.5,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.593,
			
 
				+            "coding_quality": 0.83,
			
 
				+            "general_composite": 0.619,
			
 
				+            "general_quality": 0.887,
			
 
				+            "latency_ms": 1301.7,
			
 
				+            "latency_score": 0.74,
			
 
				+            "toks_norm": 0.115
			
 
				+        },
			
 
				+        "qwen2.5-coder:7b-instruct-q5_K_M": {
			
 
				+            "avg_tok_per_sec": 9.0,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.496,
			
 
				+            "coding_quality": 0.81,
			
 
				+            "general_composite": 0.548,
			
 
				+            "general_quality": 0.925,
			
 
				+            "latency_ms": 2900.9,
			
 
				+            "latency_score": 0.42,
			
 
				+            "toks_norm": 0.09
			
 
				+        },
			
 
				+        "qwen2.5-coder:7b-instruct-q6_K": {
			
 
				+            "avg_tok_per_sec": 5.9,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.536,
			
 
				+            "coding_quality": 0.832,
			
 
				+            "general_composite": 0.576,
			
 
				+            "general_quality": 0.919,
			
 
				+            "latency_ms": 2112.8,
			
 
				+            "latency_score": 0.577,
			
 
				+            "toks_norm": 0.059
			
 
				+        },
			
 
				+        "qwen3-coder-next:latest": {
			
 
				+            "avg_tok_per_sec": 4.6,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.444,
			
 
				+            "coding_quality": 0.785,
			
 
				+            "general_composite": 0.492,
			
 
				+            "general_quality": 0.892,
			
 
				+            "latency_ms": 3462.7,
			
 
				+            "latency_score": 0.307,
			
 
				+            "toks_norm": 0.046
			
 
				+        },
			
 
				+        "qwen3-coder:30b": {
			
 
				+            "avg_tok_per_sec": 7.9,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.584,
			
 
				+            "coding_quality": 0.885,
			
 
				+            "general_composite": 0.578,
			
 
				+            "general_quality": 0.872,
			
 
				+            "latency_ms": 1769.0,
			
 
				+            "latency_score": 0.646,
			
 
				+            "toks_norm": 0.079
			
 
				+        },
			
 
				+        "qwen3.5:35b": {
			
 
				+            "avg_tok_per_sec": 5.3,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.411,
			
 
				+            "coding_quality": 0.879,
			
 
				+            "general_composite": 0.466,
			
 
				+            "general_quality": 1.0,
			
 
				+            "latency_ms": 133176.0,
			
 
				+            "latency_score": 0,
			
 
				+            "toks_norm": 0.053
			
 
				+        }
			
 
				+    },
			
 
				+    "coding_ranking": [],
			
 
				+    "general_ranking": [
			
 
				+        {
			
 
				+            "composite": 0.683,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 21.3,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.618,
			
 
				+                "coding_quality": 0.855,
			
 
				+                "general_composite": 0.683,
			
 
				+                "general_quality": 1.0,
			
 
				+                "latency_ms": 1617.0,
			
 
				+                "latency_score": 0.677,
			
 
				+                "toks_norm": 0.213
			
 
				+            },
			
 
				+            "name": "deepseek-coder-v2:16b-lite-instruct-q4_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.619,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 11.5,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.593,
			
 
				+                "coding_quality": 0.83,
			
 
				+                "general_composite": 0.619,
			
 
				+                "general_quality": 0.887,
			
 
				+                "latency_ms": 1301.7,
			
 
				+                "latency_score": 0.74,
			
 
				+                "toks_norm": 0.115
			
 
				+            },
			
 
				+            "name": "qwen2.5-coder:7b-instruct-q4_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.578,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 7.9,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.584,
			
 
				+                "coding_quality": 0.885,
			
 
				+                "general_composite": 0.578,
			
 
				+                "general_quality": 0.872,
			
 
				+                "latency_ms": 1769.0,
			
 
				+                "latency_score": 0.646,
			
 
				+                "toks_norm": 0.079
			
 
				+            },
			
 
				+            "name": "qwen3-coder:30b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.576,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 5.9,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.536,
			
 
				+                "coding_quality": 0.832,
			
 
				+                "general_composite": 0.576,
			
 
				+                "general_quality": 0.919,
			
 
				+                "latency_ms": 2112.8,
			
 
				+                "latency_score": 0.577,
			
 
				+                "toks_norm": 0.059
			
 
				+            },
			
 
				+            "name": "qwen2.5-coder:7b-instruct-q6_K"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.562,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 6.4,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.519,
			
 
				+                "coding_quality": 0.853,
			
 
				+                "general_composite": 0.562,
			
 
				+                "general_quality": 0.948,
			
 
				+                "latency_ms": 2677.7,
			
 
				+                "latency_score": 0.464,
			
 
				+                "toks_norm": 0.064
			
 
				+            },
			
 
				+            "name": "deepseek-r1:14b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.548,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 9.0,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.496,
			
 
				+                "coding_quality": 0.81,
			
 
				+                "general_composite": 0.548,
			
 
				+                "general_quality": 0.925,
			
 
				+                "latency_ms": 2900.9,
			
 
				+                "latency_score": 0.42,
			
 
				+                "toks_norm": 0.09
			
 
				+            },
			
 
				+            "name": "qwen2.5-coder:7b-instruct-q5_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.508,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 4.1,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.568,
			
 
				+                "coding_quality": 0.804,
			
 
				+                "general_composite": 0.508,
			
 
				+                "general_quality": 0.671,
			
 
				+                "latency_ms": 1126.4,
			
 
				+                "latency_score": 0.775,
			
 
				+                "toks_norm": 0.041
			
 
				+            },
			
 
				+            "name": "codellama:13b-instruct-q5_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.492,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 4.6,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.444,
			
 
				+                "coding_quality": 0.785,
			
 
				+                "general_composite": 0.492,
			
 
				+                "general_quality": 0.892,
			
 
				+                "latency_ms": 3462.7,
			
 
				+                "latency_score": 0.307,
			
 
				+                "toks_norm": 0.046
			
 
				+            },
			
 
				+            "name": "qwen3-coder-next:latest"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.469,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 6.6,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.457,
			
 
				+                "coding_quality": 0.904,
			
 
				+                "general_composite": 0.469,
			
 
				+                "general_quality": 0.931,
			
 
				+                "latency_ms": 4394.9,
			
 
				+                "latency_score": 0.121,
			
 
				+                "toks_norm": 0.066
			
 
				+            },
			
 
				+            "name": "phi4:14b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.466,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 5.3,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.411,
			
 
				+                "coding_quality": 0.879,
			
 
				+                "general_composite": 0.466,
			
 
				+                "general_quality": 1.0,
			
 
				+                "latency_ms": 133176.0,
			
 
				+                "latency_score": 0,
			
 
				+                "toks_norm": 0.053
			
 
				+            },
			
 
				+            "name": "qwen3.5:35b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.447,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 10.3,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.471,
			
 
				+                "coding_quality": 0.978,
			
 
				+                "general_composite": 0.447,
			
 
				+                "general_quality": 0.925,
			
 
				+                "latency_ms": 8158.0,
			
 
				+                "latency_score": 0,
			
 
				+                "toks_norm": 0.103
			
 
				+            },
			
 
				+            "name": "gpt-oss:20b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.437,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 4.8,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.451,
			
 
				+                "coding_quality": 0.755,
			
 
				+                "general_composite": 0.437,
			
 
				+                "general_quality": 0.725,
			
 
				+                "latency_ms": 3065.7,
			
 
				+                "latency_score": 0.387,
			
 
				+                "toks_norm": 0.048
			
 
				+            },
			
 
				+            "name": "dolphin-mixtral:8x7b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.406,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 2.3,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.32,
			
 
				+                "coding_quality": 0.696,
			
 
				+                "general_composite": 0.406,
			
 
				+                "general_quality": 0.887,
			
 
				+                "latency_ms": 58429.3,
			
 
				+                "latency_score": 0,
			
 
				+                "toks_norm": 0.023
			
 
				+            },
			
 
				+            "name": "codestral:22b-v0.1-q4_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.396,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 4.9,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.393,
			
 
				+                "coding_quality": 0.84,
			
 
				+                "general_composite": 0.396,
			
 
				+                "general_quality": 0.848,
			
 
				+                "latency_ms": 6865.3,
			
 
				+                "latency_score": 0,
			
 
				+                "toks_norm": 0.049
			
 
				+            },
			
 
				+            "name": "qwen2.5-coder:14b-instruct-q4_K_M"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.359,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 12.1,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.417,
			
 
				+                "coding_quality": 0.846,
			
 
				+                "general_composite": 0.359,
			
 
				+                "general_quality": 0.717,
			
 
				+                "latency_ms": 6696.2,
			
 
				+                "latency_score": 0,
			
 
				+                "toks_norm": 0.121
			
 
				+            },
			
 
				+            "name": "mistral:7b-instruct"
			
 
				+        }
			
 
				+    ],
			
 
				+    "slot1_general": "deepseek-coder-v2:16b-lite-instruct-q4_K_M",
			
 
				+    "slot2_general": "qwen2.5-coder:7b-instruct-q4_K_M",
			
 
				+    "slot3_coding": "deepseek-coder-v2:16b-lite-instruct-q4_K_M",
			
 
				+    "slot4_coding": "none"
			
 
				+}
			
--- a/deploy_ai.yml
+++ b/deploy_ai.yml
@@ -0,0 +1,98 @@
 
				+---
			
 
				+# =============================================================================
			
 
				+# AI Platform — Full Deployment
			
 
				+# =============================================================================
			
 
				+# Runs every role in dependency order, from pre-flight through DNS.
			
 
				+#
			
 
				+# Usage:
			
 
				+#   ansible-playbook deploy_ai.yml -K                             # full deploy (-K prompts for sudo password)
			
 
				+#   ansible-playbook deploy_ai.yml -K --tags vault                # Vault only
			
 
				+#   ansible-playbook deploy_ai.yml -K --skip-tags benchmark       # skip benchmarking
			
 
				+#   ansible-playbook deploy_ai.yml -K -e "slot4_model=deepseek-r1:14b"
			
 
				+# =============================================================================
			
 
				+
			
 
				+# ── 1. Pre-flight — verify all hosts are reachable and healthy ────────────────
			
 
				+- name: "Pre-flight checks"
			
 
				+  ansible.builtin.import_playbook: playbooks/00_preflight.yml
			
 
				+
			
 
				+# ── 2. Vault — deploy HashiCorp Vault, init, unseal, populate secrets ─────────
			
 
				+- name: "HashiCorp Vault"
			
 
				+  ansible.builtin.import_playbook: playbooks/01_vault.yml
			
 
				+
			
 
				+# ── 3. Infrastructure — Docker CE + Ollama on ai_server ──────────────────────
			
 
				+- name: "Infrastructure (Docker + Ollama)"
			
 
				+  ansible.builtin.import_playbook: playbooks/02_infrastructure.yml
			
 
				+
			
 
				+# ── 4. Benchmark — score all installed models, select 4 warm-up slots ─────────
			
 
				+- name: "Model benchmarking"
			
 
				+  ansible.builtin.import_playbook: playbooks/03_benchmark.yml
			
 
				+
			
 
				+# ── 5. Models — pull slot models, create Modelfiles, start warm-up service ────
			
 
				+- name: "Model slots and warm-up"
			
 
				+  ansible.builtin.import_playbook: playbooks/04_models.yml
			
 
				+
			
 
				+# ── 6. Keycloak — deploy Keycloak, create realm and client ────────────────────
			
 
				+- name: "Keycloak SSO"
			
 
				+  ansible.builtin.import_playbook: playbooks/05_keycloak.yml
			
 
				+
			
 
				+# ── 7. Qdrant — deploy vector database for RAG ───────────────────────────────
			
 
				+- name: "Qdrant vector database"
			
 
				+  ansible.builtin.import_playbook: playbooks/06_qdrant.yml
			
 
				+
			
 
				+# ── 8. Open WebUI — deploy with Ollama + Qdrant + Keycloak OIDC ──────────────
			
 
				+- name: "Open WebUI"
			
 
				+  ansible.builtin.import_playbook: playbooks/07_openwebui.yml
			
 
				+
			
 
				+# ── 9. OpenClaw — deploy Telegram bot (skipped if no token provided) ──────────
			
 
				+- name: "OpenClaw Telegram bot"
			
 
				+  ansible.builtin.import_playbook: playbooks/08_openclaw.yml
			
 
				+
			
 
				+# ── 10. NGINX — deploy reverse-proxy configs ─────────────────────────────────
			
 
				+- name: "NGINX reverse proxy"
			
 
				+  ansible.builtin.import_playbook: playbooks/09_nginx.yml
			
 
				+
			
 
				+# ── 11. CoreDNS — add vault + ollama-api DNS records ─────────────────────────
			
 
				+- name: "CoreDNS records"
			
 
				+  ansible.builtin.import_playbook: playbooks/10_coredns.yml
			
 
				+
			
 
				+# ── 12. Vault OIDC — configure Keycloak as Vault login provider ───────────────
			
 
				+- name: "Vault OIDC"
			
 
				+  ansible.builtin.import_playbook: playbooks/11_vault_oidc.yml
			
 
				+
			
 
				+# ── 13. Summary — print all service URLs and credentials ─────────────────────
			
 
				+- name: "Deployment summary"
			
 
				+  hosts: localhost
			
 
				+  connection: local
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - summary
			
 
				+  vars:
			
 
				+    _token_file: "{{ playbook_dir }}/vault/.vault-token"
			
 
				+    _vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+  tasks:
			
 
				+    - name: "Summary | Load credentials from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        _kc_admin_pass:       "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:admin_password token=' ~ lookup('ansible.builtin.file', _token_file) ~ ' url=' ~ _vault_url) }}"
			
 
				+        _kc_realm_admin_pass: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:realm_admin_password token=' ~ lookup('ansible.builtin.file', _token_file) ~ ' url=' ~ _vault_url) }}"
			
 
				+        _ollama_api_key:      "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', _token_file) ~ ' url=' ~ _vault_url) }}"
			
 
				+
			
 
				+    - name: "Summary | Display deployment results"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: |
			
 
				+          ╔══════════════════════════════════════════════════════╗
			
 
				+          ║       {{ platform_name }} — Deployment Complete
			
 
				+          ╠══════════════════════════════════════════════════════╣
			
 
				+          ║  Open WebUI:     {{ openwebui_url }}
			
 
				+          ║  Keycloak Admin: {{ keycloak_url }}
			
 
				+          ║  Vault UI:       {{ vault_api_addr }}
			
 
				+          ║  Ollama API:     {{ ollama_api_url }}
			
 
				+          ╠══════════════════════════════════════════════════════╣
			
 
				+          ║  Keycloak admin user:  admin                         ║
			
 
				+          ║  Keycloak admin pass:  {{ _kc_admin_pass }}
			
 
				+          ║  Realm admin user:     {{ keycloak_realm_admin_user }}
			
 
				+          ║  Realm admin pass:     {{ _kc_realm_admin_pass }}
			
 
				+          ║  Ollama API Key:       {{ _ollama_api_key }}
			
 
				+          ╠══════════════════════════════════════════════════════╣
			
 
				+          ║  Vault init file:  vault/.vault-init.json            ║
			
 
				+          ║  KEEP THIS FILE SAFE — NOT IN GIT                    ║
			
 
				+          ╚══════════════════════════════════════════════════════╝
			
--- a/inventory/group_vars/all.yml
+++ b/inventory/group_vars/all.yml
@@ -0,0 +1,112 @@
 
				+---
			
 
				+# ============================================================
			
 
				+# Global Variables — AI Platform Ansible Automation
			
 
				+# ============================================================
			
 
				+
			
 
				+# Domain and networking
			
 
				+domain: example.com
			
 
				+ai_server_ip: 192.168.1.100
			
 
				+nginx_proxy_ip: 192.168.1.30
			
 
				+coredns_host_ip: 192.168.1.29
			
 
				+
			
 
				+# SSH user for all managed hosts (override per-host in host_vars if needed)
			
 
				+ansible_user: admin
			
 
				+
			
 
				+# Platform identity — used for Keycloak realm, Vault paths, UI display names
			
 
				+platform_name: "AI Platform"
			
 
				+vault_project_slug: "ai-platform"
			
 
				+
			
 
				+# Service URLs
			
 
				+vault_url: "https://vault.{{ domain }}"
			
 
				+keycloak_url: "https://idm.{{ domain }}"
			
 
				+openwebui_url: "https://ollama-ui.{{ domain }}"
			
 
				+ollama_api_url: "https://ollama-api.{{ domain }}"
			
 
				+
			
 
				+# Storage paths on ai_server
			
 
				+ai_data_root: /mnt/ai_data
			
 
				+ollama_models_path: "{{ ai_data_root }}/ollama_models"
			
 
				+keycloak_data_path: "{{ ai_data_root }}/keycloak"
			
 
				+qdrant_data_path: "{{ ai_data_root }}/qdrant"
			
 
				+openwebui_data_path: "{{ ai_data_root }}/open-webui"
			
 
				+openclaw_data_path: "{{ ai_data_root }}/openclaw"
			
 
				+benchmark_results_path: "{{ ai_data_root }}/benchmarks"
			
 
				+
			
 
				+# Storage paths on coredns_host
			
 
				+vault_config_path: /docker_mounts/vault/config
			
 
				+vault_data_path: /docker_mounts/vault/data
			
 
				+vault_scripts_path: /docker_mounts/vault
			
 
				+coredns_zone_file: "/docker_mounts/coredns/{{ domain }}.db"
			
 
				+
			
 
				+# Local control-node paths (gitignored)
			
 
				+vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+vault_init_file: "{{ playbook_dir }}/../vault/.vault-init.json"
			
 
				+
			
 
				+# Vault configuration
			
 
				+vault_port: 8202
			
 
				+vault_api_addr: "https://vault.{{ domain }}"
			
 
				+vault_secret_prefix: "secret/data/{{ vault_project_slug }}"
			
 
				+vault_secret_meta_prefix: "secret/metadata/{{ vault_project_slug }}"
			
 
				+vault_approle_name: "ai-services"
			
 
				+
			
 
				+# Service ports
			
 
				+keycloak_port: 8180
			
 
				+ollama_port: 11434
			
 
				+qdrant_http_port: 6333
			
 
				+qdrant_grpc_port: 6334
			
 
				+
			
 
				+# Ollama configuration
			
 
				+ollama_host: "0.0.0.0:11434"
			
 
				+ollama_num_threads: 28
			
 
				+ollama_num_parallel: 4
			
 
				+ollama_max_loaded_models: 4
			
 
				+ollama_keep_alive: "-1"
			
 
				+ollama_flash_attention: "1"
			
 
				+
			
 
				+# NUMA/CPU affinity - Dell M630, 2x E5-2690v4
			
 
				+# NUMA node 1 (odd CPUs) has ~120 GB free RAM vs node 0's ~75 GB
			
 
				+ollama_numa_node: "1"
			
 
				+ollama_cpu_affinity: "1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55"
			
 
				+
			
 
				+# Keycloak configuration
			
 
				+keycloak_realm: "{{ vault_project_slug }}"
			
 
				+keycloak_realm_display: "{{ platform_name }}"
			
 
				+keycloak_client_id: open-webui
			
 
				+keycloak_redirect_uri: "https://ollama-ui.{{ domain }}/*"
			
 
				+keycloak_oidc_url: "https://idm.{{ domain }}/realms/{{ keycloak_realm }}"
			
 
				+keycloak_realm_admin_user: "{{ vault_project_slug }}-admin"
			
 
				+
			
 
				+# Benchmark thresholds
			
 
				+benchmark_thresholds:
			
 
				+  min_tokens_per_sec: 5.0
			
 
				+  min_quality_score: 0.6
			
 
				+  min_composite_score: 0.55
			
 
				+
			
 
				+# Candidate models to recommend/pull if benchmark scores are below threshold
			
 
				+candidate_models:
			
 
				+  - name: "qwen2.5-coder:32b-instruct-q4_K_M"
			
 
				+    size_gb: 20
			
 
				+    expected_tokens_sec: 4.5
			
 
				+    reason: "Larger qwen2.5-coder for higher quality"
			
 
				+    category: coding
			
 
				+  - name: "deepseek-coder-v2:latest"
			
 
				+    size_gb: 9
			
 
				+    expected_tokens_sec: 8.0
			
 
				+    reason: "DeepSeek Coder V2 full model"
			
 
				+    category: coding
			
 
				+  - name: "codegemma:7b-instruct-q5_K_M"
			
 
				+    size_gb: 5.5
			
 
				+    expected_tokens_sec: 12.0
			
 
				+    reason: "Fast Google coding model"
			
 
				+    category: coding
			
 
				+  - name: "starcoder2:15b-instruct-q4_K_M"
			
 
				+    size_gb: 9.5
			
 
				+    expected_tokens_sec: 7.0
			
 
				+    reason: "StarCoder2 coding specialist"
			
 
				+    category: coding
			
 
				+
			
 
				+# OpenClaw default model
			
 
				+openclaw_model: "llama3.2:3b"
			
 
				+
			
 
				+# NGINX SSL certificate paths (on nginx_proxy)
			
 
				+nginx_ssl_cert: "/etc/nginx/ssl/{{ domain }}.crt"
			
 
				+nginx_ssl_key: "/etc/nginx/ssl/{{ domain }}.key"
			
--- a/inventory/host_vars/ai_server.yml
+++ b/inventory/host_vars/ai_server.yml
@@ -0,0 +1,26 @@
 
				+---
			
 
				+# ============================================================
			
 
				+# Host Variables — ai_server
			
 
				+# Dell M630 blade — AI inference platform
			
 
				+# ============================================================
			
 
				+
			
 
				+# Services running on this host
			
 
				+services:
			
 
				+  ollama:
			
 
				+    port: 11434
			
 
				+    data_dir: "{{ ollama_models_path }}"
			
 
				+  keycloak:
			
 
				+    port: 8180
			
 
				+    data_dir: "{{ keycloak_data_path }}"
			
 
				+  qdrant:
			
 
				+    http_port: 6333
			
 
				+    grpc_port: 6334
			
 
				+    data_dir: "{{ qdrant_data_path }}"
			
 
				+  openwebui:
			
 
				+    port: 8080
			
 
				+    data_dir: "{{ openwebui_data_path }}"
			
 
				+  openclaw:
			
 
				+    data_dir: "{{ openclaw_data_path }}"
			
 
				+
			
 
				+# Disk checks
			
 
				+required_ai_data_free_gb: 500
			
--- a/inventory/host_vars/coredns_host.yml
+++ b/inventory/host_vars/coredns_host.yml
@@ -0,0 +1,16 @@
 
				+---
			
 
				+# ============================================================
			
 
				+# Host Variables — coredns_host
			
 
				+# Docker host — HashiCorp Vault, CoreDNS, misc containers
			
 
				+# ============================================================
			
 
				+
			
 
				+# Vault container configuration
			
 
				+vault_container_name: vault
			
 
				+vault_image: hashicorp/vault:1.15.6
			
 
				+# Vault moved to ai_server — no longer hosted here
			
 
				+
			
 
				+# CoreDNS container name
			
 
				+coredns_container_name: coredns
			
 
				+
			
 
				+# Disk checks
			
 
				+required_root_free_gb: 10
			
--- a/inventory/host_vars/nginx_proxy.yml
+++ b/inventory/host_vars/nginx_proxy.yml
@@ -0,0 +1,14 @@
 
				+---
			
 
				+# ============================================================
			
 
				+# Host Variables — nginx_proxy
			
 
				+# NGINX reverse proxy — all public HTTPS termination
			
 
				+# ============================================================
			
 
				+
			
 
				+# NGINX config directory
			
 
				+nginx_conf_dir: /etc/nginx/conf.d
			
 
				+
			
 
				+# Managed configuration files on this host
			
 
				+nginx_managed_configs:
			
 
				+  - vault.conf
			
 
				+  - ollama-api.conf
			
 
				+  - keycloak-proxy.conf
			
--- a/inventory/hosts.yml
+++ b/inventory/hosts.yml
@@ -0,0 +1,19 @@
 
				+---
			
 
				+# inventory/hosts.yml — host groups and generic default connection details
			
 
				+# Override ansible_host and ansible_user for your environment in inventory/local.yml (gitignored)
			
 
				+all:
			
 
				+  children:
			
 
				+    ai_servers:
			
 
				+      hosts:
			
 
				+        ai_server:
			
 
				+          ansible_host: 192.168.1.100
			
 
				+
			
 
				+    nginx_proxies:
			
 
				+      hosts:
			
 
				+        nginx_proxy:
			
 
				+          ansible_host: 192.168.1.30
			
 
				+
			
 
				+    coredns_hosts:
			
 
				+      hosts:
			
 
				+        coredns_host:
			
 
				+          ansible_host: 192.168.1.29
			
--- a/playbooks/00_preflight.yml
+++ b/playbooks/00_preflight.yml
@@ -0,0 +1,125 @@
 
				+---
			
 
				+# playbooks/00_preflight.yml
			
 
				+# Pre-flight checks: control-node dependencies, SSH/sudo connectivity, disk space, Ollama API health
			
 
				+
			
 
				+- name: "Pre-flight | Control node dependencies"
			
 
				+  hosts: localhost
			
 
				+  connection: local
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+    - local-deps
			
 
				+  tasks:
			
 
				+    - name: "Preflight | Ensure hvac Python library is installed (required for Vault lookups)"
			
 
				+      ansible.builtin.pip:
			
 
				+        name: hvac
			
 
				+        state: present
			
 
				+      become: false
			
 
				+
			
 
				+- name: "Pre-flight | Verify SSH and sudo access on all hosts"
			
 
				+  hosts: all
			
 
				+  gather_facts: true
			
 
				+  become: true
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+    - connectivity
			
 
				+  tasks:
			
 
				+    - name: "Preflight | Confirm sudo privileges"
			
 
				+      ansible.builtin.command: whoami
			
 
				+      changed_when: false
			
 
				+      register: whoami_result
			
 
				+
			
 
				+    - name: "Preflight | Assert running as root via sudo"
			
 
				+      ansible.builtin.assert:
			
 
				+        that:
			
 
				+          - whoami_result.stdout == "root"
			
 
				+        fail_msg: "sudo escalation failed on {{ inventory_hostname }}"
			
 
				+        success_msg: "sudo access confirmed on {{ inventory_hostname }}"
			
 
				+
			
 
				+- name: "Pre-flight | Disk space checks on ai_server"
			
 
				+  hosts: ai_server
			
 
				+  gather_facts: false
			
 
				+  become: true
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+    - disk
			
 
				+  tasks:
			
 
				+    - name: "Preflight | Get /mnt/ai_data mount info"
			
 
				+      ansible.builtin.command: df --output=avail -BG /mnt/ai_data
			
 
				+      changed_when: false
			
 
				+      register: ai_data_disk
			
 
				+
			
 
				+    - name: "Preflight | Parse available disk space on /mnt/ai_data"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        ai_data_avail_gb: "{{ ai_data_disk.stdout_lines[-1] | regex_replace('[^0-9]', '') | int }}"
			
 
				+
			
 
				+    - name: "Preflight | Assert /mnt/ai_data has >= 500 GB free"
			
 
				+      ansible.builtin.assert:
			
 
				+        that:
			
 
				+          - ai_data_avail_gb | int >= 500
			
 
				+        fail_msg: >-
			
 
				+          CRITICAL: /mnt/ai_data on ai_server has only {{ ai_data_avail_gb }} GB free.
			
 
				+          At least 500 GB is required for AI models and application data.
			
 
				+        success_msg: "/mnt/ai_data has {{ ai_data_avail_gb }} GB free (>= 500 GB required)"
			
 
				+
			
 
				+- name: "Pre-flight | Disk space checks on coredns_host"
			
 
				+  hosts: coredns_host
			
 
				+  gather_facts: false
			
 
				+  become: true
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+    - disk
			
 
				+  tasks:
			
 
				+    - name: "Preflight | Get / mount info on coredns_host"
			
 
				+      ansible.builtin.command: df --output=avail -BG /
			
 
				+      changed_when: false
			
 
				+      register: root_disk
			
 
				+
			
 
				+    - name: "Preflight | Parse available disk space on /"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        root_avail_gb: "{{ root_disk.stdout_lines[-1] | regex_replace('[^0-9]', '') | int }}"
			
 
				+
			
 
				+    - name: "Preflight | Assert / has >= 10 GB free on coredns_host"
			
 
				+      ansible.builtin.assert:
			
 
				+        that:
			
 
				+          - root_avail_gb | int >= 10
			
 
				+        fail_msg: >-
			
 
				+          CRITICAL: / on coredns_host has only {{ root_avail_gb }} GB free.
			
 
				+          At least 10 GB is required.
			
 
				+        success_msg: "/ has {{ root_avail_gb }} GB free (>= 10 GB required)"
			
 
				+
			
 
				+- name: "Pre-flight | Ollama API health check on ai_server"
			
 
				+  hosts: ai_server
			
 
				+  gather_facts: false
			
 
				+  become: false
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+    - ollama
			
 
				+  tasks:
			
 
				+    - name: "Preflight | Check Ollama API is responding"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:11434/api/tags"
			
 
				+        method: GET
			
 
				+        return_content: true
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: ollama_health
			
 
				+      failed_when: false
			
 
				+      retries: 12
			
 
				+      delay: 5
			
 
				+      until: ollama_health.status is defined and ollama_health.status == 200
			
 
				+
			
 
				+    - name: "Preflight | Assert Ollama API is healthy"
			
 
				+      ansible.builtin.assert:
			
 
				+        that:
			
 
				+          - ollama_health.status == 200
			
 
				+        fail_msg: >-
			
 
				+          CRITICAL: Ollama API is not responding on ai_server at http://localhost:11434/api/tags.
			
 
				+          HTTP status: {{ ollama_health.status | default('unreachable') }}.
			
 
				+          Ensure Ollama is installed and running before proceeding.
			
 
				+        success_msg: "Ollama API is healthy and responding on ai_server"
			
 
				+
			
 
				+    - name: "Preflight | Display available Ollama models"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Ollama models available: {{ (ollama_health.json.models | default([])) | map(attribute='name') | list }}"
			
 
				+      when: ollama_health.status == 200
			
--- a/playbooks/01_vault.yml
+++ b/playbooks/01_vault.yml
@@ -0,0 +1,518 @@
 
				+---
			
 
				+# playbooks/01_vault.yml
			
 
				+# Deploy HashiCorp Vault as a native systemd service on ai_server.
			
 
				+# Using the official HashiCorp RPM avoids Docker networking/SELinux issues.
			
 
				+
			
 
				+- name: "Vault | Deploy and configure HashiCorp Vault"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - vault
			
 
				+  vars:
			
 
				+    vault_port: 8202
			
 
				+    vault_config_dir: /etc/vault.d
			
 
				+    vault_data_dir: /mnt/ai_data/vault/data
			
 
				+    vault_addr: "http://127.0.0.1:{{ vault_port }}"
			
 
				+    vault_init_file: "{{ playbook_dir }}/../vault/.vault-init.json"
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+
			
 
				+  vars_prompt:
			
 
				+    - name: telegram_token
			
 
				+      prompt: "Telegram Bot Token (from @BotFather). Press ENTER to skip"
			
 
				+      private: false
			
 
				+      default: ""
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Install Vault via official HashiCorp RPM ──────────────────────
			
 
				+    - name: "Vault | Remove any stale HashiCorp repo file"
			
 
				+      ansible.builtin.file:
			
 
				+        path: /etc/yum.repos.d/hashicorp.repo
			
 
				+        state: absent
			
 
				+      tags:
			
 
				+        - vault-install
			
 
				+
			
 
				+    - name: "Vault | Install dnf-plugins-core"
			
 
				+      ansible.builtin.dnf:
			
 
				+        name: dnf-plugins-core
			
 
				+        state: present
			
 
				+      tags:
			
 
				+        - vault-install
			
 
				+
			
 
				+    - name: "Vault | Download HashiCorp RPM repo file"
			
 
				+      ansible.builtin.get_url:
			
 
				+        url: https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
			
 
				+        dest: /etc/yum.repos.d/hashicorp.repo
			
 
				+        mode: "0644"
			
 
				+      tags:
			
 
				+        - vault-install
			
 
				+
			
 
				+    - name: "Vault | Install vault package"
			
 
				+      ansible.builtin.dnf:
			
 
				+        name: vault
			
 
				+        state: present
			
 
				+      tags:
			
 
				+        - vault-install
			
 
				+
			
 
				+    # ── Data directory ────────────────────────────────────────────────
			
 
				+    - name: "Vault | Create data directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ vault_data_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0750"
			
 
				+        owner: vault
			
 
				+        group: vault
			
 
				+      tags:
			
 
				+        - vault-dirs
			
 
				+
			
 
				+    # ── Configuration ─────────────────────────────────────────────────
			
 
				+    - name: "Vault | Template vault.hcl configuration"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/vault/vault.hcl.j2"
			
 
				+        dest: "{{ vault_config_dir }}/vault.hcl"
			
 
				+        mode: "0640"
			
 
				+        owner: vault
			
 
				+        group: vault
			
 
				+      notify: Restart vault
			
 
				+      tags:
			
 
				+        - vault-config
			
 
				+
			
 
				+    - name: "Vault | Show rendered vault.hcl"
			
 
				+      ansible.builtin.command: cat {{ vault_config_dir }}/vault.hcl
			
 
				+      changed_when: false
			
 
				+      register: vault_hcl_content
			
 
				+      tags:
			
 
				+        - vault-config
			
 
				+
			
 
				+    - name: "Vault | Display vault.hcl"
			
 
				+      ansible.builtin.debug:
			
 
				+        var: vault_hcl_content.stdout_lines
			
 
				+      tags:
			
 
				+        - vault-config
			
 
				+
			
 
				+    # ── Firewall ───────────────────────────────────────────────────────
			
 
				+    - name: "Vault | Open Vault port in firewalld"
			
 
				+      ansible.posix.firewalld:
			
 
				+        port: "{{ vault_port }}/tcp"
			
 
				+        permanent: true
			
 
				+        immediate: true
			
 
				+        state: enabled
			
 
				+      tags:
			
 
				+        - vault-service
			
 
				+
			
 
				+    # ── Start service ─────────────────────────────────────────────────
			
 
				+    - name: "Vault | Enable and start vault.service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: vault
			
 
				+        state: started
			
 
				+        enabled: true
			
 
				+        daemon_reload: true
			
 
				+      tags:
			
 
				+        - vault-service
			
 
				+
			
 
				+    - name: "Vault | Wait for Vault to become ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/health"
			
 
				+        method: GET
			
 
				+        status_code: [200, 429, 472, 473, 501, 503]
			
 
				+        timeout: 5
			
 
				+      register: vault_health
			
 
				+      retries: 30
			
 
				+      delay: 5
			
 
				+      until: vault_health.status in [200, 429, 472, 473, 501, 503]
			
 
				+      tags:
			
 
				+        - vault-health
			
 
				+
			
 
				+    # ── Initialization ────────────────────────────────────────────────
			
 
				+    - name: "Vault | Check if Vault is already initialized"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/health"
			
 
				+        method: GET
			
 
				+        status_code: [200, 429, 472, 473, 501, 503]
			
 
				+      register: vault_init_check
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Set initialization status fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_is_initialized: "{{ vault_init_check.status != 501 }}"
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Initialize Vault"
			
 
				+      ansible.builtin.command:
			
 
				+        cmd: >-
			
 
				+          vault operator init
			
 
				+          -key-shares=1 -key-threshold=1 -format=json
			
 
				+      environment:
			
 
				+        VAULT_ADDR: "{{ vault_addr }}"
			
 
				+      register: vault_init_output
			
 
				+      when: not vault_is_initialized
			
 
				+      changed_when: true
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Ensure local vault directory exists on control node"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ playbook_dir }}/../vault"
			
 
				+        state: directory
			
 
				+        mode: "0700"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Save init output to control node"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: "{{ vault_init_output.stdout }}"
			
 
				+        dest: "{{ vault_init_file }}"
			
 
				+        mode: "0600"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Parse init output"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_init_data: "{{ vault_init_output.stdout | from_json }}"
			
 
				+      when: not vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    - name: "Vault | Display unseal key and root token"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg:
			
 
				+          - "============================================="
			
 
				+          - "  VAULT INITIALIZATION COMPLETE"
			
 
				+          - "============================================="
			
 
				+          - "  Unseal Key: {{ vault_init_data.unseal_keys_b64[0] }}"
			
 
				+          - "  Root Token: {{ vault_init_data.root_token }}"
			
 
				+          - "============================================="
			
 
				+          - "  SAVE THESE VALUES SECURELY!"
			
 
				+          - "============================================="
			
 
				+      when: not vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-init
			
 
				+
			
 
				+    # ── Load existing init data if already initialized ────────────────
			
 
				+    - name: "Vault | Load existing init data from control node"
			
 
				+      ansible.builtin.slurp:
			
 
				+        src: "{{ vault_init_file }}"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      register: vault_init_file_content
			
 
				+      when: vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-unseal
			
 
				+
			
 
				+    - name: "Vault | Parse existing init data"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_init_data: "{{ vault_init_file_content.content | b64decode | from_json }}"
			
 
				+      when: vault_is_initialized
			
 
				+      tags:
			
 
				+        - vault-unseal
			
 
				+
			
 
				+    # ── Unseal ────────────────────────────────────────────────────────
			
 
				+    - name: "Vault | Check seal status"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/seal-status"
			
 
				+        method: GET
			
 
				+      register: vault_seal_status
			
 
				+      tags:
			
 
				+        - vault-unseal
			
 
				+
			
 
				+    - name: "Vault | Unseal Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/unseal"
			
 
				+        method: PUT
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          key: "{{ vault_init_data.unseal_keys_b64[0] }}"
			
 
				+        status_code: 200
			
 
				+      when: vault_seal_status.json.sealed | default(true)
			
 
				+      tags:
			
 
				+        - vault-unseal
			
 
				+
			
 
				+    - name: "Vault | Set root token fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_root_token: "{{ vault_init_data.root_token }}"
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    # ── Enable KV v2 secrets engine ───────────────────────────────────
			
 
				+    - name: "Vault | Check existing secrets engines"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/mounts"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_mounts
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    - name: "Vault | Enable KV v2 secrets engine at 'secret'"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/mounts/secret"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          type: kv
			
 
				+          options:
			
 
				+            version: "2"
			
 
				+        status_code: [200, 204]
			
 
				+      when: "'secret/' not in vault_mounts.json"
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    # ── Create ansible policy ─────────────────────────────────────────
			
 
				+    - name: "Vault | Create ansible-policy"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/policies/acl/ansible-policy"
			
 
				+        method: PUT
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          policy: |
			
 
				+            path "{{ vault_secret_prefix }}/*" {
			
 
				+              capabilities = ["create", "read", "update", "delete", "list"]
			
 
				+            }
			
 
				+            path "{{ vault_secret_meta_prefix }}/*" {
			
 
				+              capabilities = ["list", "read", "delete"]
			
 
				+            }
			
 
				+            path "{{ vault_secret_meta_prefix }}" {
			
 
				+              capabilities = ["list"]
			
 
				+            }
			
 
				+            path "secret/metadata/" {
			
 
				+              capabilities = ["list"]
			
 
				+            }
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    # ── Create ansible token ──────────────────────────────────────────
			
 
				+    - name: "Vault | Create ansible token with ansible-policy"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/auth/token/create"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          policies:
			
 
				+            - ansible-policy
			
 
				+          display_name: ansible
			
 
				+          ttl: "8760h"
			
 
				+          renewable: true
			
 
				+          no_parent: true
			
 
				+        status_code: 200
			
 
				+      register: ansible_token_result
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    - name: "Vault | Save ansible token to control node"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: "{{ ansible_token_result.json.auth.client_token }}"
			
 
				+        dest: "{{ vault_token_file }}"
			
 
				+        mode: "0600"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      tags:
			
 
				+        - vault-configure
			
 
				+
			
 
				+    # ── Enable AppRole auth ───────────────────────────────────────────
			
 
				+    - name: "Vault | Check existing auth methods"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/auth"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_auth_methods
			
 
				+      tags:
			
 
				+        - vault-approle
			
 
				+
			
 
				+    - name: "Vault | Enable AppRole auth method"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/auth/approle"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          type: approle
			
 
				+        status_code: [200, 204]
			
 
				+      when: "'approle/' not in vault_auth_methods.json"
			
 
				+      tags:
			
 
				+        - vault-approle
			
 
				+
			
 
				+    - name: "Vault | Create {{ vault_approle_name }} AppRole"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/auth/approle/role/{{ vault_approle_name }}"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          token_policies:
			
 
				+            - ansible-policy
			
 
				+          token_ttl: "1h"
			
 
				+          token_max_ttl: "4h"
			
 
				+          secret_id_ttl: "0"
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-approle
			
 
				+
			
 
				+    # ── Generate and populate secrets (only write if absent) ──────────
			
 
				+    # Each secret is checked first — existing secrets are never overwritten.
			
 
				+    # To rotate a credential, delete its Vault path and re-run this playbook.
			
 
				+
			
 
				+    - name: "Vault | Check existing secrets"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/{{ item }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        status_code: [200, 404]
			
 
				+      loop:
			
 
				+        - ollama
			
 
				+        - openwebui
			
 
				+        - keycloak
			
 
				+      register: existing_secrets
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Build existing secrets map"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        secret_exists: "{{ secret_exists | default({}) | combine({item.item: item.status == 200}) }}"
			
 
				+      loop: "{{ existing_secrets.results }}"
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    # ── Ollama ────────────────────────────────────────────────────────
			
 
				+    - name: "Vault | Generate Ollama API key"
			
 
				+      ansible.builtin.command: openssl rand -hex 32
			
 
				+      register: ollama_api_key
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not secret_exists['ollama']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Store Ollama secrets"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/ollama"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            api_key: "{{ ollama_api_key.stdout }}"
			
 
				+        status_code: [200, 204]
			
 
				+      when: not secret_exists['ollama']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    # ── Open WebUI ────────────────────────────────────────────────────
			
 
				+    - name: "Vault | Generate Open WebUI secret key"
			
 
				+      ansible.builtin.command: openssl rand -hex 32
			
 
				+      register: openwebui_secret_key
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not secret_exists['openwebui']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Store Open WebUI secrets"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/openwebui"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            secret_key: "{{ openwebui_secret_key.stdout }}"
			
 
				+        status_code: [200, 204]
			
 
				+      when: not secret_exists['openwebui']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    # ── Keycloak ──────────────────────────────────────────────────────
			
 
				+    - name: "Vault | Generate Keycloak admin password"
			
 
				+      ansible.builtin.command: openssl rand -base64 16
			
 
				+      register: keycloak_admin_password
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not secret_exists['keycloak']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Generate Keycloak client secret"
			
 
				+      ansible.builtin.command: openssl rand -hex 32
			
 
				+      register: keycloak_client_secret
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not secret_exists['keycloak']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Generate Keycloak realm admin password"
			
 
				+      ansible.builtin.command: openssl rand -base64 16
			
 
				+      register: keycloak_realm_admin_password
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not secret_exists['keycloak']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    - name: "Vault | Store Keycloak secrets"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/keycloak"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            admin_password: "{{ keycloak_admin_password.stdout }}"
			
 
				+            client_secret: "{{ keycloak_client_secret.stdout }}"
			
 
				+            realm_admin_password: "{{ keycloak_realm_admin_password.stdout }}"
			
 
				+        status_code: [200, 204]
			
 
				+      when: not secret_exists['keycloak']
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+    # ── OpenClaw (always write when token is provided) ────────────────
			
 
				+    - name: "Vault | Store Telegram token in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/openclaw"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            telegram_token: "{{ telegram_token }}"
			
 
				+        status_code: [200, 204]
			
 
				+      when: telegram_token | length > 0
			
 
				+      tags:
			
 
				+        - vault-secrets
			
 
				+
			
 
				+  handlers:
			
 
				+    - name: Restart vault
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: vault
			
 
				+        state: restarted
			
 
				+        daemon_reload: true
			
--- a/playbooks/02_infrastructure.yml
+++ b/playbooks/02_infrastructure.yml
@@ -0,0 +1,168 @@
 
				+---
			
 
				+# playbooks/02_infrastructure.yml
			
 
				+# Install Docker and configure Ollama on ai_server
			
 
				+
			
 
				+- name: "Infrastructure | Docker and Ollama setup on ai_server"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - infrastructure
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+    ollama_num_threads: 28
			
 
				+    ollama_num_parallel: 4
			
 
				+    ollama_max_loaded_models: 4
			
 
				+    ollama_keep_alive: "-1"
			
 
				+    ollama_numa_node: "1"
			
 
				+    ollama_cpu_affinity: "1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55"
			
 
				+
			
 
				+  pre_tasks:
			
 
				+    - name: "Infrastructure | Install Python Docker SDK prerequisites"
			
 
				+      ansible.builtin.dnf:
			
 
				+        name:
			
 
				+          - python3-pip
			
 
				+          - python3-requests
			
 
				+        state: present
			
 
				+      tags: always
			
 
				+
			
 
				+    - name: "Infrastructure | Install Python docker SDK via pip"
			
 
				+      ansible.builtin.pip:
			
 
				+        name: docker
			
 
				+        state: present
			
 
				+        executable: pip3
			
 
				+      tags: always
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Docker installation ──────────────────────────────────────────
			
 
				+    - name: "Docker | Check if Docker CE repo is already configured"
			
 
				+      ansible.builtin.stat:
			
 
				+        path: /etc/yum.repos.d/docker-ce.repo
			
 
				+      register: docker_repo_file
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    - name: "Docker | Add Docker CE repository"
			
 
				+      ansible.builtin.command:
			
 
				+        cmd: dnf config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo
			
 
				+      when: not docker_repo_file.stat.exists
			
 
				+      changed_when: true
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    - name: "Docker | Install Docker CE packages"
			
 
				+      ansible.builtin.dnf:
			
 
				+        name:
			
 
				+          - docker-ce
			
 
				+          - docker-ce-cli
			
 
				+          - containerd.io
			
 
				+          - docker-compose-plugin
			
 
				+        state: present
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    - name: "Docker | Add {{ ansible_user }} to docker group"
			
 
				+      ansible.builtin.user:
			
 
				+        name: "{{ ansible_user }}"
			
 
				+        groups: docker
			
 
				+        append: true
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    - name: "Docker | Add ollama user to docker group"
			
 
				+      ansible.builtin.user:
			
 
				+        name: ollama
			
 
				+        groups: docker
			
 
				+        append: true
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    - name: "Docker | Start and enable docker.service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: docker
			
 
				+        state: started
			
 
				+        enabled: true
			
 
				+      tags:
			
 
				+        - docker
			
 
				+
			
 
				+    # ── Ollama installation and configuration ────────────────────────
			
 
				+    - name: "Ollama | Check if ollama binary exists"
			
 
				+      ansible.builtin.stat:
			
 
				+        path: "{{ item }}"
			
 
				+      loop:
			
 
				+        - /usr/local/bin/ollama
			
 
				+        - /usr/bin/ollama
			
 
				+      register: ollama_binary_check
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Set ollama installed fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        ollama_installed: "{{ ollama_binary_check.results | selectattr('stat.exists', 'equalto', true) | list | length > 0 }}"
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Install Ollama"
			
 
				+      ansible.builtin.shell:
			
 
				+        cmd: curl -fsSL https://ollama.ai/install.sh | sh
			
 
				+      when: not ollama_installed
			
 
				+      changed_when: true
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Retrieve OLLAMA_API_KEY from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Create systemd override directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: /etc/systemd/system/ollama.service.d
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Template systemd override configuration"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/ollama/override.conf.j2"
			
 
				+        dest: /etc/systemd/system/ollama.service.d/override.conf
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      notify:
			
 
				+        - Reload systemd and restart ollama
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Ensure Ollama is running"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama
			
 
				+        state: started
			
 
				+        enabled: true
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Wait for Ollama API to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:11434/api/tags"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: ollama_ready
			
 
				+      retries: 24
			
 
				+      delay: 5
			
 
				+      until: ollama_ready.status == 200
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+  handlers:
			
 
				+    - name: Reload systemd and restart ollama
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama
			
 
				+        state: restarted
			
 
				+        daemon_reload: true
			
--- a/playbooks/03_benchmark.yml
+++ b/playbooks/03_benchmark.yml
@@ -0,0 +1,314 @@
 
				+---
			
 
				+# playbooks/03_benchmark.yml
			
 
				+# Benchmark installed Ollama models and select optimal models for each slot
			
 
				+
			
 
				+- name: "Benchmark | Evaluate Ollama models"
			
 
				+  hosts: ai_server
			
 
				+  become: false
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - benchmark
			
 
				+  vars:
			
 
				+    benchmark_models: ""
			
 
				+    pull_if_better: false
			
 
				+    min_composite_score: 0.50
			
 
				+    ollama_api_url: "http://localhost:11434"
			
 
				+    benchmark_results_dir: "{{ playbook_dir }}/../benchmarks/results"
			
 
				+    test_prompts:
			
 
				+      code_gen:
			
 
				+        prompt: "Write a Python merge sort with type hints, docstring, and 3 unit tests"
			
 
				+        category: coding
			
 
				+        weight: 1.0
			
 
				+      debug:
			
 
				+        prompt: >-
			
 
				+          Here is a Python function with 3 bugs. Find and fix all bugs:
			
 
				+
			
 
				+          def calculate_average(numbers):
			
 
				+              total = 0
			
 
				+              for n in numbers:
			
 
				+                  total =+ n
			
 
				+              average = total / len(numbers
			
 
				+              return averege
			
 
				+        category: coding
			
 
				+        weight: 1.0
			
 
				+      refactor:
			
 
				+        prompt: >-
			
 
				+          Refactor this for readability and performance:
			
 
				+
			
 
				+          def f(l):
			
 
				+              r=[]
			
 
				+              for i in range(len(l)):
			
 
				+                  if l[i]%2==0:
			
 
				+                      r.append(l[i]*2)
			
 
				+              return r
			
 
				+        category: coding
			
 
				+        weight: 1.0
			
 
				+      explain:
			
 
				+        prompt: "Explain how Python's GIL works and when it matters"
			
 
				+        category: general
			
 
				+        weight: 1.0
			
 
				+      creative:
			
 
				+        prompt: "Suggest 5 fun family activities for a rainy weekend"
			
 
				+        category: general
			
 
				+        weight: 1.0
			
 
				+      reasoning:
			
 
				+        prompt: "I have 3 apples. I give away half. Then I get 4 more. How many do I have?"
			
 
				+        category: general
			
 
				+        weight: 1.0
			
 
				+      latency:
			
 
				+        prompt: "Hi"
			
 
				+        category: latency
			
 
				+        weight: 0.5
			
 
				+
			
 
				+  tasks:
			
 
				+    - name: "Benchmark | Ensure results directory exists on control node"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ benchmark_results_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+      delegate_to: localhost
			
 
				+      tags:
			
 
				+        - benchmark-setup
			
 
				+
			
 
				+    - name: "Benchmark | Wait for Ollama API to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:11434/api/tags"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: ollama_ready
			
 
				+      retries: 24
			
 
				+      delay: 5
			
 
				+      until: ollama_ready.status == 200
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Discover installed models"
			
 
				+      ansible.builtin.command: ollama list
			
 
				+      changed_when: false
			
 
				+      register: ollama_list_output
			
 
				+      retries: 6
			
 
				+      delay: 10
			
 
				+      until: ollama_list_output.rc == 0
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Parse model names from ollama list"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        installed_models: "{{ ollama_list_output.stdout_lines[1:] | map('split') | map('first') | list }}"
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Set models_to_benchmark to all installed models"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        models_to_benchmark: "{{ installed_models }}"
			
 
				+      when: benchmark_models | default('') | length == 0
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Set models_to_benchmark to specified subset"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        models_to_benchmark: "{{ benchmark_models.split(',') | map('trim') | list }}"
			
 
				+      when: benchmark_models | default('') | length > 0
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Display models to benchmark"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Will benchmark the following models: {{ models_to_benchmark }}"
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Run test prompts against each model"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ ollama_api_url }}/api/generate"
			
 
				+        method: POST
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          model: "{{ item.0 }}"
			
 
				+          prompt: "{{ test_prompts[item.1].prompt }}"
			
 
				+          stream: false
			
 
				+        timeout: 300
			
 
				+        status_code: 200
			
 
				+      loop: "{{ models_to_benchmark | product(test_prompts.keys() | list) | list }}"
			
 
				+      loop_control:
			
 
				+        label: "{{ item.0 }} / {{ item.1 }}"
			
 
				+      register: benchmark_raw_results
			
 
				+      failed_when: false
			
 
				+      tags:
			
 
				+        - benchmark-run
			
 
				+
			
 
				+    - name: "Benchmark | Compute per-model metrics"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_metrics: |
			
 
				+          {% set ns = namespace(results={}) %}
			
 
				+          {% for model in models_to_benchmark %}
			
 
				+          {%   set ns2 = namespace(coding_quality=0, coding_count=0, general_quality=0, general_count=0, total_toks=0, total_eval_time=0, ttft_sum=0, ttft_count=0, latency_ns=0) %}
			
 
				+          {%   for result in benchmark_raw_results.results %}
			
 
				+          {%     if result.item[0] == model and result.status == 200 %}
			
 
				+          {%       set test_name = result.item[1] %}
			
 
				+          {%       set resp = result.json | default({}) %}
			
 
				+          {%       set eval_count = resp.eval_count | default(0) | int %}
			
 
				+          {%       set eval_duration = resp.eval_duration | default(1) | int %}
			
 
				+          {%       set prompt_eval_duration = resp.prompt_eval_duration | default(0) | int %}
			
 
				+          {%       set response_text = resp.response | default('') %}
			
 
				+          {%       set tok_per_sec = (eval_count / (eval_duration / 1000000000.0)) if eval_duration > 0 else 0 %}
			
 
				+          {%       set ns2.total_toks = ns2.total_toks + tok_per_sec %}
			
 
				+          {%       set ns2.ttft_sum = ns2.ttft_sum + prompt_eval_duration %}
			
 
				+          {%       set ns2.ttft_count = ns2.ttft_count + 1 %}
			
 
				+          {%       if test_name == 'latency' %}
			
 
				+          {%         set ns2.latency_ns = eval_duration + prompt_eval_duration %}
			
 
				+          {%       endif %}
			
 
				+          {%       set resp_len = response_text | length %}
			
 
				+          {%       if test_name in ['code_gen', 'debug', 'refactor'] %}
			
 
				+          {%         set has_def = 1 if 'def ' in response_text else 0 %}
			
 
				+          {%         set has_return = 1 if 'return' in response_text else 0 %}
			
 
				+          {%         set length_score = [resp_len / 1500.0, 1.0] | min %}
			
 
				+          {%         set quality = (has_def * 0.3 + has_return * 0.3 + length_score * 0.4) %}
			
 
				+          {%         set ns2.coding_quality = ns2.coding_quality + quality %}
			
 
				+          {%         set ns2.coding_count = ns2.coding_count + 1 %}
			
 
				+          {%       elif test_name in ['explain', 'creative', 'reasoning'] %}
			
 
				+          {%         set length_score = [resp_len / 800.0, 1.0] | min %}
			
 
				+          {%         set has_structure = 1 if ('\n' in response_text and resp_len > 100) else 0 %}
			
 
				+          {%         set quality = (length_score * 0.6 + has_structure * 0.4) %}
			
 
				+          {%         set ns2.general_quality = ns2.general_quality + quality %}
			
 
				+          {%         set ns2.general_count = ns2.general_count + 1 %}
			
 
				+          {%       endif %}
			
 
				+          {%     endif %}
			
 
				+          {%   endfor %}
			
 
				+          {%   set coding_avg = (ns2.coding_quality / ns2.coding_count) if ns2.coding_count > 0 else 0 %}
			
 
				+          {%   set general_avg = (ns2.general_quality / ns2.general_count) if ns2.general_count > 0 else 0 %}
			
 
				+          {%   set test_count = (ns2.ttft_count) if ns2.ttft_count > 0 else 1 %}
			
 
				+          {%   set avg_toks = ns2.total_toks / test_count %}
			
 
				+          {%   set toks_norm = [avg_toks / 100.0, 1.0] | min %}
			
 
				+          {%   set latency_ms = ns2.latency_ns / 1000000.0 if ns2.latency_ns > 0 else 9999 %}
			
 
				+          {%   set latency_score = [1.0 - (latency_ms / 5000.0), 0] | max %}
			
 
				+          {%   set coding_composite = coding_avg * 0.45 + toks_norm * 0.30 + latency_score * 0.25 %}
			
 
				+          {%   set general_composite = general_avg * 0.45 + toks_norm * 0.30 + latency_score * 0.25 %}
			
 
				+          {%   set category = 'coding' if (coding_composite - general_composite) >= 0.15 else 'general' %}
			
 
				+          {%   set _ = ns.results.update({model: {'coding_quality': coding_avg | round(3), 'general_quality': general_avg | round(3), 'avg_tok_per_sec': avg_toks | round(1), 'toks_norm': toks_norm | round(3), 'latency_ms': latency_ms | round(1), 'latency_score': latency_score | round(3), 'coding_composite': coding_composite | round(3), 'general_composite': general_composite | round(3), 'category': category}}) %}
			
 
				+          {% endfor %}
			
 
				+          {{ ns.results | to_json }}
			
 
				+      tags:
			
 
				+        - benchmark-compute
			
 
				+
			
 
				+    - name: "Benchmark | Parse model metrics"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        parsed_metrics: "{{ model_metrics | from_json }}"
			
 
				+      tags:
			
 
				+        - benchmark-compute
			
 
				+
			
 
				+    - name: "Benchmark | Rank models and select slots"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_selection: |
			
 
				+          {% set general_models = [] %}
			
 
				+          {% set coding_models = [] %}
			
 
				+          {% for model, metrics in parsed_metrics.items() %}
			
 
				+          {%   if metrics.category == 'general' %}
			
 
				+          {%     set _ = general_models.append({'name': model, 'composite': metrics.general_composite, 'metrics': metrics}) %}
			
 
				+          {%   else %}
			
 
				+          {%     set _ = coding_models.append({'name': model, 'composite': metrics.coding_composite, 'metrics': metrics}) %}
			
 
				+          {%   endif %}
			
 
				+          {% endfor %}
			
 
				+          {% set general_sorted = general_models | sort(attribute='composite', reverse=true) %}
			
 
				+          {% set coding_sorted = coding_models | sort(attribute='composite', reverse=true) %}
			
 
				+          {% set slot1 = general_sorted[0].name if general_sorted | length > 0 else 'none' %}
			
 
				+          {% set slot2 = general_sorted[1].name if general_sorted | length > 1 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
			
 
				+          {% set slot3 = coding_sorted[0].name if coding_sorted | length > 0 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
			
 
				+          {% set slot4 = coding_sorted[1].name if coding_sorted | length > 1 else (coding_sorted[0].name if coding_sorted | length > 0 else 'none') %}
			
 
				+          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot3_coding': slot3, 'slot4_coding': slot4, 'all_metrics': parsed_metrics, 'general_ranking': general_sorted, 'coding_ranking': coding_sorted} | to_json }}
			
 
				+      tags:
			
 
				+        - benchmark-select
			
 
				+
			
 
				+    - name: "Benchmark | Parse model selection"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        selection: "{{ model_selection | from_json }}"
			
 
				+      tags:
			
 
				+        - benchmark-select
			
 
				+
			
 
				+    - name: "Benchmark | Display model selection results"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg:
			
 
				+          - "============================================="
			
 
				+          - "  MODEL SELECTION RESULTS"
			
 
				+          - "============================================="
			
 
				+          - "  Slot 1 (General Primary):  {{ selection.slot1_general }}"
			
 
				+          - "  Slot 2 (General Secondary): {{ selection.slot2_general }}"
			
 
				+          - "  Slot 3 (Coding Primary):   {{ selection.slot3_coding }}"
			
 
				+          - "  Slot 4 (Coding Secondary): {{ selection.slot4_coding }}"
			
 
				+          - "============================================="
			
 
				+      tags:
			
 
				+        - benchmark-select
			
 
				+
			
 
				+    - name: "Benchmark | Generate timestamp"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        benchmark_timestamp: "{{ ansible_date_time.iso8601_basic_short }}"
			
 
				+      tags:
			
 
				+        - benchmark-report
			
 
				+
			
 
				+    - name: "Benchmark | Save benchmark results markdown"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          # Benchmark Results - {{ benchmark_timestamp }}
			
 
				+
			
 
				+          ## Model Selection
			
 
				+          | Slot | Role | Model | Composite Score |
			
 
				+          |------|------|-------|----------------|
			
 
				+          | 1 | General (Primary) | {{ selection.slot1_general }} | {{ parsed_metrics[selection.slot1_general].general_composite | default('N/A') }} |
			
 
				+          | 2 | General (Secondary) | {{ selection.slot2_general }} | {{ parsed_metrics[selection.slot2_general].general_composite | default('N/A') }} |
			
 
				+          | 3 | Coding (Primary) | {{ selection.slot3_coding }} | {{ parsed_metrics[selection.slot3_coding].coding_composite | default('N/A') }} |
			
 
				+          | 4 | Coding (Secondary) | {{ selection.slot4_coding }} | {{ parsed_metrics[selection.slot4_coding].coding_composite | default('N/A') }} |
			
 
				+
			
 
				+          ## Detailed Metrics
			
 
				+          {% for model, metrics in parsed_metrics.items() %}
			
 
				+          ### {{ model }}
			
 
				+          - **Category**: {{ metrics.category }}
			
 
				+          - **Coding Quality**: {{ metrics.coding_quality }}
			
 
				+          - **General Quality**: {{ metrics.general_quality }}
			
 
				+          - **Avg Tokens/sec**: {{ metrics.avg_tok_per_sec }}
			
 
				+          - **Latency (ms)**: {{ metrics.latency_ms }}
			
 
				+          - **Coding Composite**: {{ metrics.coding_composite }}
			
 
				+          - **General Composite**: {{ metrics.general_composite }}
			
 
				+          {% endfor %}
			
 
				+
			
 
				+          ## Scoring Formula
			
 
				+          - Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+          - Category: coding if (coding_composite - general_composite) >= 0.15, else general
			
 
				+        dest: "{{ benchmark_results_dir }}/benchmark_{{ benchmark_timestamp }}.md"
			
 
				+        mode: "0644"
			
 
				+      delegate_to: localhost
			
 
				+      tags:
			
 
				+        - benchmark-report
			
 
				+
			
 
				+    - name: "Benchmark | Save model_selection.json"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: "{{ selection | to_nice_json }}"
			
 
				+        dest: "{{ benchmark_results_dir }}/model_selection.json"
			
 
				+        mode: "0644"
			
 
				+      delegate_to: localhost
			
 
				+      tags:
			
 
				+        - benchmark-report
			
 
				+
			
 
				+    - name: "Benchmark | Check minimum composite scores"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: >-
			
 
				+          WARNING: Best composite score for {{ item.key }} models is below threshold
			
 
				+          ({{ min_composite_score }}). Consider pulling additional models.
			
 
				+          Recommended candidates: qwen2.5-coder:14b, deepseek-coder-v2:16b, codellama:34b
			
 
				+      when: >-
			
 
				+        (item.value.coding_composite < min_composite_score) and
			
 
				+        (item.value.general_composite < min_composite_score)
			
 
				+      loop: "{{ parsed_metrics | dict2items }}"
			
 
				+      loop_control:
			
 
				+        label: "{{ item.key }}"
			
 
				+      tags:
			
 
				+        - benchmark-report
			
 
				+
			
 
				+    - name: "Benchmark | Pull recommended model if pull_if_better is true"
			
 
				+      ansible.builtin.command: "ollama pull qwen2.5-coder:14b"
			
 
				+      when:
			
 
				+        - pull_if_better | bool
			
 
				+        - parsed_metrics.values() | map(attribute='coding_composite') | max < min_composite_score
			
 
				+      changed_when: true
			
 
				+      tags:
			
 
				+        - benchmark-pull
			
--- a/playbooks/04_models.yml
+++ b/playbooks/04_models.yml
@@ -0,0 +1,204 @@
 
				+---
			
 
				+# playbooks/04_models.yml
			
 
				+# Pull and register Ollama models based on benchmark selection
			
 
				+
			
 
				+- name: "Models | Pull and register Ollama models"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - models
			
 
				+  vars:
			
 
				+    model_selection_file: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
			
 
				+    modelfiles_dir: /mnt/ai_data/ollama_models/modelfiles
			
 
				+    slot4_model: ""
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Load benchmark results ───────────────────────────────────────
			
 
				+    - name: "Models | Read model_selection.json from control node"
			
 
				+      ansible.builtin.slurp:
			
 
				+        src: "{{ model_selection_file }}"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      register: model_selection_raw
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				+    - name: "Models | Parse model selection data"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_selection: "{{ model_selection_raw.content | b64decode | from_json }}"
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				+    - name: "Models | Apply slot4 override if provided"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_selection: "{{ model_selection | combine({'slot4_coding': slot4_model}) }}"
			
 
				+      when: slot4_model | length > 0
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				+    - name: "Models | Display selected models"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg:
			
 
				+          - "Slot 1 (General Primary):   {{ model_selection.slot1_general }}"
			
 
				+          - "Slot 2 (General Secondary):  {{ model_selection.slot2_general }}"
			
 
				+          - "Slot 3 (Coding Primary):    {{ model_selection.slot3_coding }}"
			
 
				+          - "Slot 4 (Coding Secondary):  {{ model_selection.slot4_coding }}"
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				+    # ── Pull models ──────────────────────────────────────────────────
			
 
				+    - name: "Models | Get currently installed models"
			
 
				+      ansible.builtin.command: ollama list
			
 
				+      changed_when: false
			
 
				+      register: current_models
			
 
				+      tags:
			
 
				+        - models-pull
			
 
				+
			
 
				+    - name: "Models | Set installed models list"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        installed_model_names: >-
			
 
				+          {{ current_models.stdout_lines[1:] |
			
 
				+             default([]) |
			
 
				+             map('split') |
			
 
				+             map('first') |
			
 
				+             list }}
			
 
				+      tags:
			
 
				+        - models-pull
			
 
				+
			
 
				+    - name: "Models | Pull slot models if not already present"
			
 
				+      ansible.builtin.command: "ollama pull {{ item }}"
			
 
				+      loop:
			
 
				+        - "{{ model_selection.slot1_general }}"
			
 
				+        - "{{ model_selection.slot2_general }}"
			
 
				+        - "{{ model_selection.slot3_coding }}"
			
 
				+        - "{{ model_selection.slot4_coding }}"
			
 
				+      when:
			
 
				+        - item | length > 0
			
 
				+        - item != 'none'
			
 
				+        - item not in installed_model_names
			
 
				+      changed_when: true
			
 
				+      loop_control:
			
 
				+        label: "Pulling {{ item }}"
			
 
				+      tags:
			
 
				+        - models-pull
			
 
				+
			
 
				+    - name: "Models | Pull fixed models if not already present"
			
 
				+      ansible.builtin.command: "ollama pull {{ item }}"
			
 
				+      loop:
			
 
				+        - "llama3.2:3b"
			
 
				+        - "gemma3:12b-it-q4_K_M"
			
 
				+      when: item not in installed_model_names
			
 
				+      changed_when: true
			
 
				+      loop_control:
			
 
				+        label: "Pulling {{ item }}"
			
 
				+      tags:
			
 
				+        - models-pull
			
 
				+
			
 
				+    # ── Create Modelfiles ────────────────────────────────────────────
			
 
				+    - name: "Models | Create modelfiles directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ modelfiles_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				+    - name: "Models | Template coder-128k Modelfile"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          FROM {{ model_selection.slot3_coding }}
			
 
				+          PARAMETER num_ctx 131072
			
 
				+          SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
			
 
				+        dest: "{{ modelfiles_dir }}/Modelfile.coder-128k"
			
 
				+        mode: "0644"
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				+    - name: "Models | Template coder-32k Modelfile"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          FROM {{ model_selection.slot4_coding }}
			
 
				+          PARAMETER num_ctx 32768
			
 
				+          SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
			
 
				+        dest: "{{ modelfiles_dir }}/Modelfile.coder-32k"
			
 
				+        mode: "0644"
			
 
				+      when:
			
 
				+        - model_selection.slot4_coding | length > 0
			
 
				+        - model_selection.slot4_coding != 'none'
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				+    - name: "Models | Template llama-family Modelfile"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          FROM llama3.2:3b
			
 
				+          PARAMETER num_ctx 8192
			
 
				+          SYSTEM You are a helpful, friendly family assistant. Provide safe, age-appropriate responses suitable for all family members.
			
 
				+        dest: "{{ modelfiles_dir }}/Modelfile.llama-family"
			
 
				+        mode: "0644"
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				+    - name: "Models | Template gemma-family Modelfile"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          FROM gemma3:12b-it-q4_K_M
			
 
				+          PARAMETER num_ctx 32768
			
 
				+          SYSTEM You are a helpful, friendly family assistant. Provide safe, age-appropriate responses suitable for all family members.
			
 
				+        dest: "{{ modelfiles_dir }}/Modelfile.gemma-family"
			
 
				+        mode: "0644"
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				+    # ── Register models ──────────────────────────────────────────────
			
 
				+    - name: "Models | Register custom models with Ollama"
			
 
				+      ansible.builtin.command: "ollama create {{ item.name }} -f {{ modelfiles_dir }}/{{ item.file }}"
			
 
				+      loop:
			
 
				+        - { name: "coder-128k", file: "Modelfile.coder-128k" }
			
 
				+        - { name: "coder-32k",  file: "Modelfile.coder-32k",  slot: "{{ model_selection.slot4_coding }}" }
			
 
				+        - { name: "llama-family", file: "Modelfile.llama-family" }
			
 
				+        - { name: "gemma-family", file: "Modelfile.gemma-family" }
			
 
				+      when: item.slot is not defined or (item.slot | length > 0 and item.slot != 'none')
			
 
				+      changed_when: true
			
 
				+      loop_control:
			
 
				+        label: "Creating {{ item.name }}"
			
 
				+      tags:
			
 
				+        - models-register
			
 
				+
			
 
				+    # ── Warmup service ───────────────────────────────────────────────
			
 
				+    - name: "Models | Template warmup script"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/ollama/warmup.sh.j2"
			
 
				+        dest: /usr/local/bin/ollama-warmup.sh
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - models-warmup
			
 
				+
			
 
				+    - name: "Models | Template warmup systemd service"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/ollama/ollama-warmup.service.j2"
			
 
				+        dest: /etc/systemd/system/ollama-warmup.service
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - models-warmup
			
 
				+
			
 
				+    - name: "Models | Reload systemd daemon"
			
 
				+      ansible.builtin.systemd:
			
 
				+        daemon_reload: true
			
 
				+      tags:
			
 
				+        - models-warmup
			
 
				+
			
 
				+    - name: "Models | Enable and start warmup service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama-warmup
			
 
				+        enabled: true
			
 
				+        state: started
			
 
				+      tags:
			
 
				+        - models-warmup
			
--- a/playbooks/05_keycloak.yml
+++ b/playbooks/05_keycloak.yml
@@ -0,0 +1,366 @@
 
				+---
			
 
				+# playbooks/05_keycloak.yml
			
 
				+# Deploy Keycloak on ai_server, create realm, client, roles, and admin user
			
 
				+
			
 
				+- name: "Keycloak | Deploy and configure Keycloak"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - keycloak
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+    keycloak_container_name: keycloak
			
 
				+    keycloak_port: 8180
			
 
				+    keycloak_data_dir: /mnt/ai_data/keycloak
			
 
				+    keycloak_realm: "{{ vault_project_slug }}"
			
 
				+    keycloak_base_url: "http://localhost:8180"
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Secret resolution: Vault if available, otherwise generate locally ─
			
 
				+    - name: "Keycloak | Check if Vault token exists"
			
 
				+      ansible.builtin.stat:
			
 
				+        path: "{{ vault_token_file }}"
			
 
				+      register: vault_token_stat
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Set vault_available fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_available: "{{ vault_token_stat.stat.exists }}"
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    # -- From Vault (when available) --
			
 
				+    - name: "Keycloak | Retrieve admin password from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_admin_password: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:admin_password token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      when: vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Retrieve client secret from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_client_secret: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:client_secret token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      when: vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Retrieve realm admin password from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_realm_admin_password: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:realm_admin_password token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      when: vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    # -- Generate locally (when Vault not available) --
			
 
				+    - name: "Keycloak | Generate admin password locally"
			
 
				+      ansible.builtin.command: openssl rand -base64 16
			
 
				+      register: _kc_admin_pass_gen
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      changed_when: false
			
 
				+      when: not vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Generate client secret locally"
			
 
				+      ansible.builtin.command: openssl rand -hex 32
			
 
				+      register: _kc_client_secret_gen
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      changed_when: false
			
 
				+      when: not vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Generate realm admin password locally"
			
 
				+      ansible.builtin.command: openssl rand -base64 16
			
 
				+      register: _kc_realm_pass_gen
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      changed_when: false
			
 
				+      when: not vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Set locally generated secrets as facts"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_admin_password: "{{ _kc_admin_pass_gen.stdout }}"
			
 
				+        keycloak_client_secret: "{{ _kc_client_secret_gen.stdout }}"
			
 
				+        keycloak_realm_admin_password: "{{ _kc_realm_pass_gen.stdout }}"
			
 
				+      when: not vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    - name: "Keycloak | Save generated credentials to vault/keycloak-credentials.txt"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          # Keycloak credentials — generated {{ ansible_date_time.iso8601 }}
			
 
				+          # Vault was not available; store these securely.
			
 
				+          keycloak_admin_password={{ keycloak_admin_password }}
			
 
				+          keycloak_client_secret={{ keycloak_client_secret }}
			
 
				+          keycloak_realm_admin_password={{ keycloak_realm_admin_password }}
			
 
				+        dest: "{{ playbook_dir }}/../vault/keycloak-credentials.txt"
			
 
				+        mode: "0600"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: not vault_available
			
 
				+      tags:
			
 
				+        - keycloak-secrets
			
 
				+
			
 
				+    # ── Container deployment ─────────────────────────────────────────
			
 
				+    - name: "Keycloak | Create data directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ keycloak_data_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: "1000"
			
 
				+        group: "1000"
			
 
				+      tags:
			
 
				+        - keycloak-deploy
			
 
				+
			
 
				+    - name: "Keycloak | Run Keycloak container"
			
 
				+      community.docker.docker_container:
			
 
				+        name: "{{ keycloak_container_name }}"
			
 
				+        image: quay.io/keycloak/keycloak:latest
			
 
				+        state: started
			
 
				+        restart_policy: unless-stopped
			
 
				+        ports:
			
 
				+          - "{{ keycloak_port }}:8080"
			
 
				+        env:
			
 
				+          KEYCLOAK_ADMIN: admin
			
 
				+          KEYCLOAK_ADMIN_PASSWORD: "{{ keycloak_admin_password }}"
			
 
				+          KC_PROXY_HEADERS: xforwarded
			
 
				+          KC_HOSTNAME: "https://idm.{{ domain }}"
			
 
				+          KC_HTTP_ENABLED: "true"
			
 
				+        volumes:
			
 
				+          - "{{ keycloak_data_dir }}:/opt/keycloak/data"
			
 
				+        command: start
			
 
				+      tags:
			
 
				+        - keycloak-deploy
			
 
				+
			
 
				+    - name: "Keycloak | Wait for Keycloak to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/realms/master"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: keycloak_ready
			
 
				+      retries: 30
			
 
				+      delay: 10
			
 
				+      until: keycloak_ready.status == 200
			
 
				+      tags:
			
 
				+        - keycloak-deploy
			
 
				+
			
 
				+    # ── Admin token ──────────────────────────────────────────────────
			
 
				+    - name: "Keycloak | Get admin access token"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/realms/master/protocol/openid-connect/token"
			
 
				+        method: POST
			
 
				+        body_format: form-urlencoded
			
 
				+        body:
			
 
				+          grant_type: password
			
 
				+          client_id: admin-cli
			
 
				+          username: admin
			
 
				+          password: "{{ keycloak_admin_password }}"
			
 
				+        status_code: 200
			
 
				+      register: keycloak_admin_token
			
 
				+      tags:
			
 
				+        - keycloak-configure
			
 
				+
			
 
				+    - name: "Keycloak | Set admin token fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        kc_token: "{{ keycloak_admin_token.json.access_token }}"
			
 
				+      tags:
			
 
				+        - keycloak-configure
			
 
				+
			
 
				+    # ── Create realm ─────────────────────────────────────────────────
			
 
				+    - name: "Keycloak | Check if realm exists"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: [200, 404]
			
 
				+      register: realm_check
			
 
				+      tags:
			
 
				+        - keycloak-realm
			
 
				+
			
 
				+    - name: "Keycloak | Create realm {{ keycloak_realm }}"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          realm: "{{ keycloak_realm }}"
			
 
				+          displayName: "{{ keycloak_realm_display }}"
			
 
				+          enabled: true
			
 
				+        status_code: [201, 409]
			
 
				+      when: realm_check.status == 404
			
 
				+      tags:
			
 
				+        - keycloak-realm
			
 
				+
			
 
				+    # ── Create client ────────────────────────────────────────────────
			
 
				+    - name: "Keycloak | Check if open-webui client exists"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients?clientId=open-webui"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: client_check
			
 
				+      tags:
			
 
				+        - keycloak-client
			
 
				+
			
 
				+    - name: "Keycloak | Create open-webui client"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          clientId: open-webui
			
 
				+          enabled: true
			
 
				+          protocol: openid-connect
			
 
				+          publicClient: false
			
 
				+          clientAuthenticatorType: client-secret
			
 
				+          secret: "{{ keycloak_client_secret }}"
			
 
				+          redirectUris:
			
 
				+            - "{{ keycloak_redirect_uri }}"
			
 
				+          webOrigins:
			
 
				+            - "{{ openwebui_url }}"
			
 
				+          standardFlowEnabled: true
			
 
				+          directAccessGrantsEnabled: false
			
 
				+        status_code: [201, 409]
			
 
				+      when: client_check.json | length == 0
			
 
				+      tags:
			
 
				+        - keycloak-client
			
 
				+
			
 
				+    # ── Create realm roles ───────────────────────────────────────────
			
 
				+    - name: "Keycloak | Create ai-user role"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/roles"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          name: ai-user
			
 
				+          description: "Standard AI platform user"
			
 
				+        status_code: [201, 409]
			
 
				+      tags:
			
 
				+        - keycloak-roles
			
 
				+
			
 
				+    - name: "Keycloak | Create ai-admin role"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/roles"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          name: ai-admin
			
 
				+          description: "AI platform administrator"
			
 
				+        status_code: [201, 409]
			
 
				+      tags:
			
 
				+        - keycloak-roles
			
 
				+
			
 
				+    # ── Create realm admin user ──────────────────────────────────────
			
 
				+    - name: "Keycloak | Check if realm admin user exists"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/users?username={{ keycloak_realm_admin_user }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: admin_user_check
			
 
				+      tags:
			
 
				+        - keycloak-users
			
 
				+
			
 
				+    - name: "Keycloak | Create realm admin user"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/users"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          username: "{{ keycloak_realm_admin_user }}"
			
 
				+          enabled: true
			
 
				+          emailVerified: true
			
 
				+          credentials:
			
 
				+            - type: password
			
 
				+              value: "{{ keycloak_realm_admin_password }}"
			
 
				+              temporary: false
			
 
				+        status_code: [201, 409]
			
 
				+      when: admin_user_check.json | length == 0
			
 
				+      tags:
			
 
				+        - keycloak-users
			
 
				+
			
 
				+    - name: "Keycloak | Get realm admin user ID"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/users?username={{ keycloak_realm_admin_user }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: admin_user_info
			
 
				+      tags:
			
 
				+        - keycloak-users
			
 
				+
			
 
				+    - name: "Keycloak | Get ai-admin role representation"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/roles/ai-admin"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: ai_admin_role
			
 
				+      tags:
			
 
				+        - keycloak-users
			
 
				+
			
 
				+    - name: "Keycloak | Assign ai-admin role to realm admin user"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/users/{{ admin_user_info.json[0].id }}/role-mappings/realm"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          - id: "{{ ai_admin_role.json.id }}"
			
 
				+            name: ai-admin
			
 
				+        status_code: [200, 204]
			
 
				+      when: admin_user_info.json | length > 0
			
 
				+      tags:
			
 
				+        - keycloak-users
			
 
				+
			
 
				+    # ── Store OIDC URL in Vault ──────────────────────────────────────
			
 
				+    - name: "Keycloak | Store OIDC URL in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_api_addr }}/v1/{{ vault_secret_prefix }}/keycloak"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ lookup('ansible.builtin.file', vault_token_file) }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            admin_password: "{{ keycloak_admin_password }}"
			
 
				+            client_secret: "{{ keycloak_client_secret }}"
			
 
				+            realm_admin_password: "{{ keycloak_realm_admin_password }}"
			
 
				+            oidc_url: "{{ keycloak_oidc_url }}"
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - keycloak-vault
			
--- a/playbooks/06_qdrant.yml
+++ b/playbooks/06_qdrant.yml
@@ -0,0 +1,57 @@
 
				+---
			
 
				+# playbooks/06_qdrant.yml
			
 
				+# Deploy Qdrant vector database on ai_server
			
 
				+
			
 
				+- name: "Qdrant | Deploy Qdrant vector database"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - qdrant
			
 
				+  vars:
			
 
				+    qdrant_data_dir: /mnt/ai_data/qdrant
			
 
				+    qdrant_container_name: qdrant
			
 
				+
			
 
				+  tasks:
			
 
				+    - name: "Qdrant | Create data directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ qdrant_data_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - qdrant-deploy
			
 
				+
			
 
				+    - name: "Qdrant | Run Qdrant container"
			
 
				+      community.docker.docker_container:
			
 
				+        name: "{{ qdrant_container_name }}"
			
 
				+        image: qdrant/qdrant:latest
			
 
				+        state: started
			
 
				+        restart_policy: unless-stopped
			
 
				+        ports:
			
 
				+          - "127.0.0.1:6333:6333"
			
 
				+          - "127.0.0.1:6334:6334"
			
 
				+        volumes:
			
 
				+          - "{{ qdrant_data_dir }}:/qdrant/storage"
			
 
				+      tags:
			
 
				+        - qdrant-deploy
			
 
				+
			
 
				+    - name: "Qdrant | Wait for Qdrant to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://127.0.0.1:6333/healthz"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 5
			
 
				+      register: qdrant_health
			
 
				+      retries: 15
			
 
				+      delay: 5
			
 
				+      until: qdrant_health.status == 200
			
 
				+      tags:
			
 
				+        - qdrant-deploy
			
 
				+
			
 
				+    - name: "Qdrant | Display status"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Qdrant is running and healthy on ports 6333 (HTTP) and 6334 (gRPC)"
			
 
				+      tags:
			
 
				+        - qdrant-deploy
			
--- a/playbooks/07_openwebui.yml
+++ b/playbooks/07_openwebui.yml
@@ -0,0 +1,117 @@
 
				+---
			
 
				+# playbooks/07_openwebui.yml
			
 
				+# Deploy Open WebUI on ai_server
			
 
				+
			
 
				+- name: "Open WebUI | Deploy Open WebUI"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - openwebui
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+    openwebui_container_name: open-webui
			
 
				+    openwebui_data_dir: /mnt/ai_data/open-webui
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Retrieve secrets from Vault ──────────────────────────────────
			
 
				+    - name: "Open WebUI | Retrieve Ollama API key from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - openwebui-secrets
			
 
				+
			
 
				+    - name: "Open WebUI | Retrieve Keycloak client secret from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_client_secret: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:client_secret token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - openwebui-secrets
			
 
				+
			
 
				+    - name: "Open WebUI | Retrieve Open WebUI secret key from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        openwebui_secret_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/openwebui:secret_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - openwebui-secrets
			
 
				+
			
 
				+    - name: "Open WebUI | Retrieve OIDC URL from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_oidc_url: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:oidc_url token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - openwebui-secrets
			
 
				+
			
 
				+    # ── Container deployment ─────────────────────────────────────────
			
 
				+    - name: "Open WebUI | Stop and remove existing container"
			
 
				+      community.docker.docker_container:
			
 
				+        name: "{{ openwebui_container_name }}"
			
 
				+        state: absent
			
 
				+      tags:
			
 
				+        - openwebui-deploy
			
 
				+
			
 
				+    - name: "Open WebUI | Create data directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ openwebui_data_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - openwebui-deploy
			
 
				+
			
 
				+    - name: "Open WebUI | Run Open WebUI container"
			
 
				+      community.docker.docker_container:
			
 
				+        name: "{{ openwebui_container_name }}"
			
 
				+        image: ghcr.io/open-webui/open-webui:main
			
 
				+        state: started
			
 
				+        restart_policy: unless-stopped
			
 
				+        ports:
			
 
				+          - "8080:8080"
			
 
				+        etc_hosts:
			
 
				+          host.docker.internal: host-gateway
			
 
				+        volumes:
			
 
				+          - "{{ openwebui_data_dir }}:/app/backend/data"
			
 
				+        env:
			
 
				+          OLLAMA_BASE_URL: "http://host.docker.internal:11434"
			
 
				+          OLLAMA_API_KEY: "{{ ollama_api_key }}"
			
 
				+          WEBUI_SECRET_KEY: "{{ openwebui_secret_key }}"
			
 
				+          WEBUI_AUTH: "true"
			
 
				+          ENABLE_OAUTH_SIGNUP: "true"
			
 
				+          OAUTH_PROVIDER_NAME: "{{ platform_name }}"
			
 
				+          OAUTH_CLIENT_ID: "open-webui"
			
 
				+          OAUTH_CLIENT_SECRET: "{{ keycloak_client_secret }}"
			
 
				+          OPENID_PROVIDER_URL: "{{ keycloak_oidc_url }}/.well-known/openid-configuration"
			
 
				+          OAUTH_SCOPES: "openid email profile"
			
 
				+          ENABLE_OAUTH_ROLE_MANAGEMENT: "true"
			
 
				+          OAUTH_ROLES_CLAIM: "realm_access.roles"
			
 
				+          OAUTH_ALLOWED_ROLES: "ai-user,ai-admin"
			
 
				+          OAUTH_ADMIN_ROLES: "ai-admin"
			
 
				+          ENABLE_RAG_WEB_SEARCH: "false"
			
 
				+          RAG_EMBEDDING_ENGINE: "ollama"
			
 
				+          RAG_EMBEDDING_MODEL: "nomic-embed-text"
			
 
				+          RAG_OLLAMA_BASE_URL: "http://host.docker.internal:11434"
			
 
				+          VECTOR_DB: "qdrant"
			
 
				+          QDRANT_URI: "http://host.docker.internal:6333"
			
 
				+          ENABLE_ADMIN_EXPORT: "true"
			
 
				+          DEFAULT_MODELS: "llama-family"
			
 
				+          WEBUI_NAME: "{{ platform_name }}"
			
 
				+      tags:
			
 
				+        - openwebui-deploy
			
 
				+
			
 
				+    - name: "Open WebUI | Wait for Open WebUI to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:8080"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: openwebui_health
			
 
				+      retries: 30
			
 
				+      delay: 10
			
 
				+      until: openwebui_health.status == 200
			
 
				+      tags:
			
 
				+        - openwebui-deploy
			
 
				+
			
 
				+    - name: "Open WebUI | Display status"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Open WebUI is running at http://localhost:8080 (proxied via {{ openwebui_url }})"
			
 
				+      tags:
			
 
				+        - openwebui-deploy
			
--- a/playbooks/08_openclaw.yml
+++ b/playbooks/08_openclaw.yml
@@ -0,0 +1,171 @@
 
				+---
			
 
				+# playbooks/08_openclaw.yml
			
 
				+# Deploy OpenClaw Telegram bot on ai_server (optional)
			
 
				+
			
 
				+- name: "OpenClaw | Deploy OpenClaw Telegram bot"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: true
			
 
				+  tags:
			
 
				+    - openclaw
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+    openclaw_data_dir: /mnt/ai_data/openclaw
			
 
				+
			
 
				+  vars_prompt:
			
 
				+    - name: telegram_token_input
			
 
				+      prompt: "Telegram Bot Token (from @BotFather). Press ENTER to use token already in Vault"
			
 
				+      private: false
			
 
				+      default: ""
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Store token in Vault if provided ─────────────────────────────
			
 
				+    - name: "OpenClaw | Store Telegram token in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_url }}/v1/{{ vault_secret_prefix }}/openclaw"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ lookup('ansible.builtin.file', vault_token_file) }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            telegram_token: "{{ telegram_token_input }}"
			
 
				+        status_code: [200, 204]
			
 
				+      when: telegram_token_input | length > 0
			
 
				+      tags:
			
 
				+        - openclaw-vault
			
 
				+
			
 
				+    # ── Read token from Vault (whether just stored or pre-existing) ───
			
 
				+    - name: "OpenClaw | Check for Telegram token in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_url }}/v1/{{ vault_secret_prefix }}/openclaw"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ lookup('ansible.builtin.file', vault_token_file) }}"
			
 
				+        status_code: [200, 404]
			
 
				+      register: vault_openclaw_secret
			
 
				+      tags:
			
 
				+        - openclaw-vault
			
 
				+
			
 
				+    - name: "OpenClaw | Set skip flag"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        skip_openclaw: "{{ vault_openclaw_secret.status == 404 or
			
 
				+                           vault_openclaw_secret.json.data.data.telegram_token | default('') | length == 0 }}"
			
 
				+      tags:
			
 
				+        - openclaw-vault
			
 
				+
			
 
				+    - name: "OpenClaw | Display skip message"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Skipping OpenClaw installation (no Telegram token in Vault or provided)"
			
 
				+      when: skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-vault
			
 
				+
			
 
				+    - name: "OpenClaw | Set telegram_token fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        telegram_token: "{{ vault_openclaw_secret.json.data.data.telegram_token }}"
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-vault
			
 
				+
			
 
				+    # ── Retrieve Ollama API key from Vault ────────────────────────────
			
 
				+    - name: "OpenClaw | Retrieve Ollama API key from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    # ── Install Python dependencies ───────────────────────────────────
			
 
				+    - name: "OpenClaw | Install Python dependencies"
			
 
				+      ansible.builtin.pip:
			
 
				+        name:
			
 
				+          - python-telegram-bot
			
 
				+          - requests
			
 
				+          - pyyaml
			
 
				+        state: present
			
 
				+        executable: pip3
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-install
			
 
				+
			
 
				+    # ── Deploy bot script and config ─────────────────────────────────
			
 
				+    - name: "OpenClaw | Create data directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: "{{ openclaw_data_dir }}"
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    - name: "OpenClaw | Create log directory"
			
 
				+      ansible.builtin.file:
			
 
				+        path: /var/log/openclaw
			
 
				+        state: directory
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    - name: "OpenClaw | Deploy bot script"
			
 
				+      ansible.builtin.copy:
			
 
				+        src: "{{ playbook_dir }}/../templates/openclaw/bot.py"
			
 
				+        dest: "{{ openclaw_data_dir }}/bot.py"
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    - name: "OpenClaw | Template config.yml"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/openclaw/config.yml.j2"
			
 
				+        dest: "{{ openclaw_data_dir }}/config.yml"
			
 
				+        mode: "0640"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    # ── Systemd service ───────────────────────────────────────────────
			
 
				+    - name: "OpenClaw | Template systemd service"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/openclaw/openclaw.service.j2"
			
 
				+        dest: /etc/systemd/system/openclaw.service
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-service
			
 
				+
			
 
				+    - name: "OpenClaw | Reload systemd daemon"
			
 
				+      ansible.builtin.systemd:
			
 
				+        daemon_reload: true
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-service
			
 
				+
			
 
				+    - name: "OpenClaw | Enable and start OpenClaw service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: openclaw
			
 
				+        enabled: true
			
 
				+        state: started
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-service
			
 
				+
			
 
				+    - name: "OpenClaw | Display status"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "OpenClaw Telegram bot is installed and running. Message your bot to test it."
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-service
			
--- a/playbooks/09_nginx.yml
+++ b/playbooks/09_nginx.yml
@@ -0,0 +1,53 @@
 
				+---
			
 
				+# playbooks/09_nginx.yml
			
 
				+# Configure Nginx reverse proxy on nginx_proxy host
			
 
				+
			
 
				+- name: "Nginx | Configure reverse proxy"
			
 
				+  hosts: nginx_proxy
			
 
				+  become: true
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - nginx
			
 
				+  tasks:
			
 
				+    - name: "Nginx | Template Vault proxy configuration"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/nginx/vault.conf.j2"
			
 
				+        dest: /etc/nginx/conf.d/vault.conf
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - nginx-vault
			
 
				+
			
 
				+    - name: "Nginx | Template Ollama API proxy configuration"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/nginx/ollama-api.conf.j2"
			
 
				+        dest: /etc/nginx/conf.d/ollama-api.conf
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - nginx-ollama
			
 
				+
			
 
				+    - name: "Nginx | Template Keycloak proxy configuration"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/nginx/keycloak-proxy.conf.j2"
			
 
				+        dest: /etc/nginx/conf.d/keycloak-proxy.conf
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - nginx-keycloak
			
 
				+
			
 
				+    - name: "Nginx | Validate configuration"
			
 
				+      ansible.builtin.command: nginx -t
			
 
				+      changed_when: false
			
 
				+      tags:
			
 
				+        - nginx-validate
			
 
				+
			
 
				+    - name: "Nginx | Reload nginx"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: nginx
			
 
				+        state: reloaded
			
 
				+      tags:
			
 
				+        - nginx-validate
			
--- a/playbooks/10_coredns.yml
+++ b/playbooks/10_coredns.yml
@@ -0,0 +1,64 @@
 
				+---
			
 
				+# playbooks/10_coredns.yml
			
 
				+# Update CoreDNS records on coredns_host
			
 
				+
			
 
				+- name: "CoreDNS | Update DNS records"
			
 
				+  hosts: coredns_host
			
 
				+  become: true
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - coredns
			
 
				+  vars:
			
 
				+    dns_records:
			
 
				+      - name: "vault.{{ domain }}."
			
 
				+        type: A
			
 
				+        ttl: 3600
			
 
				+        value: "{{ nginx_proxy_ip }}"
			
 
				+      - name: "ollama-api.{{ domain }}."
			
 
				+        type: A
			
 
				+        ttl: 3600
			
 
				+        value: "{{ nginx_proxy_ip }}"
			
 
				+      - name: "idm.{{ domain }}."
			
 
				+        type: A
			
 
				+        ttl: 3600
			
 
				+        value: "{{ nginx_proxy_ip }}"
			
 
				+
			
 
				+  tasks:
			
 
				+    - name: "CoreDNS | Add vault DNS record"
			
 
				+      ansible.builtin.lineinfile:
			
 
				+        path: "{{ coredns_zone_file }}"
			
 
				+        line: "vault.{{ domain }}.    3600    IN    A    {{ nginx_proxy_ip }}"
			
 
				+        regexp: "^vault\\.{{ domain | replace('.', '\\.') }}\\."
			
 
				+        insertafter: EOF
			
 
				+        state: present
			
 
				+      register: vault_dns_record
			
 
				+      tags:
			
 
				+        - coredns-records
			
 
				+
			
 
				+    - name: "CoreDNS | Add ollama-api DNS record"
			
 
				+      ansible.builtin.lineinfile:
			
 
				+        path: "{{ coredns_zone_file }}"
			
 
				+        line: "ollama-api.{{ domain }}.    3600    IN    A    {{ nginx_proxy_ip }}"
			
 
				+        regexp: "^ollama-api\\.{{ domain | replace('.', '\\.') }}\\."
			
 
				+        insertafter: EOF
			
 
				+        state: present
			
 
				+      register: ollama_dns_record
			
 
				+      tags:
			
 
				+        - coredns-records
			
 
				+
			
 
				+    - name: "CoreDNS | Add idm (Keycloak) DNS record"
			
 
				+      ansible.builtin.lineinfile:
			
 
				+        path: "{{ coredns_zone_file }}"
			
 
				+        line: "idm.{{ domain }}.    3600    IN    A    {{ nginx_proxy_ip }}"
			
 
				+        regexp: "^idm\\.{{ domain | replace('.', '\\.') }}\\."
			
 
				+        insertafter: EOF
			
 
				+        state: present
			
 
				+      register: idm_dns_record
			
 
				+      tags:
			
 
				+        - coredns-records
			
 
				+
			
 
				+    - name: "CoreDNS | Restart CoreDNS container to reload zone file"
			
 
				+      ansible.builtin.command: docker restart {{ coredns_container_name }}
			
 
				+      changed_when: true
			
 
				+      tags:
			
 
				+        - coredns-reload
			
--- a/playbooks/11_vault_oidc.yml
+++ b/playbooks/11_vault_oidc.yml
@@ -0,0 +1,319 @@
 
				+---
			
 
				+# playbooks/11_vault_oidc.yml
			
 
				+# Configure Keycloak as an OIDC provider for Vault UI login.
			
 
				+# Run this after 05_keycloak.yml — requires the realm and admin user to exist.
			
 
				+
			
 
				+- name: "Vault OIDC | Configure Keycloak authentication for Vault"
			
 
				+  hosts: ai_server
			
 
				+  become: true
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - vault-oidc
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+    vault_addr: "http://127.0.0.1:{{ vault_port }}"
			
 
				+    keycloak_base_url: "http://localhost:8180"
			
 
				+    vault_oidc_client_id: "vault"
			
 
				+
			
 
				+  tasks:
			
 
				+    # ── Load root token ───────────────────────────────────────────────
			
 
				+    - name: "Vault OIDC | Load Vault root token"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_root_token: "{{ lookup('ansible.builtin.file', playbook_dir ~ '/../vault/.vault-init.json') | from_json | json_query('root_token') }}"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      tags: always
			
 
				+
			
 
				+    # ── Resolve client secret (reuse existing or generate new) ────────
			
 
				+    - name: "Vault OIDC | Check for existing vault OIDC client secret"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/vault-oidc"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        status_code: [200, 404]
			
 
				+      register: existing_oidc_secret
			
 
				+      tags: always
			
 
				+
			
 
				+    - name: "Vault OIDC | Use existing client secret"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_oidc_client_secret: "{{ existing_oidc_secret.json.data.data.client_secret }}"
			
 
				+      when: existing_oidc_secret.status == 200
			
 
				+      tags: always
			
 
				+
			
 
				+    - name: "Vault OIDC | Generate new client secret"
			
 
				+      ansible.builtin.command: openssl rand -hex 32
			
 
				+      register: _new_secret
			
 
				+      changed_when: false
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      when: existing_oidc_secret.status == 404
			
 
				+      tags: always
			
 
				+
			
 
				+    - name: "Vault OIDC | Set new client secret fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_oidc_client_secret: "{{ _new_secret.stdout }}"
			
 
				+      when: existing_oidc_secret.status == 404
			
 
				+      tags: always
			
 
				+
			
 
				+    # ── Get Keycloak admin token ──────────────────────────────────────
			
 
				+    - name: "Vault OIDC | Retrieve Keycloak admin password from Vault"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        keycloak_admin_password: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:admin_password token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Get Keycloak admin token"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/realms/master/protocol/openid-connect/token"
			
 
				+        method: POST
			
 
				+        body_format: form-urlencoded
			
 
				+        body:
			
 
				+          grant_type: password
			
 
				+          client_id: admin-cli
			
 
				+          username: admin
			
 
				+          password: "{{ keycloak_admin_password }}"
			
 
				+        status_code: 200
			
 
				+      register: kc_token_result
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Set Keycloak token fact"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        kc_token: "{{ kc_token_result.json.access_token }}"
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    # ── Create vault OIDC client in Keycloak ─────────────────────────
			
 
				+    - name: "Vault OIDC | Check if vault client exists in Keycloak"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients?clientId={{ vault_oidc_client_id }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_client_check
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Create vault client in Keycloak"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          clientId: "{{ vault_oidc_client_id }}"
			
 
				+          enabled: true
			
 
				+          protocol: openid-connect
			
 
				+          publicClient: false
			
 
				+          clientAuthenticatorType: client-secret
			
 
				+          secret: "{{ vault_oidc_client_secret }}"
			
 
				+          redirectUris:
			
 
				+            - "https://vault.{{ domain }}/ui/vault/auth/oidc/oidc/callback"
			
 
				+            - "http://localhost:8250/oidc/callback"
			
 
				+          webOrigins:
			
 
				+            - "https://vault.{{ domain }}"
			
 
				+          standardFlowEnabled: true
			
 
				+          directAccessGrantsEnabled: false
			
 
				+        status_code: [201, 409]
			
 
				+      when: vault_client_check.json | length == 0
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    # When the client already exists, Keycloak is the source of truth for the secret.
			
 
				+    # POST .../client-secret regenerates a new random secret — it cannot be used to
			
 
				+    # set a specific value. Instead, read whatever Keycloak currently has and use that.
			
 
				+    - name: "Vault OIDC | Read existing vault client secret from Keycloak"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients/{{ vault_client_check.json[0].id }}/client-secret"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: keycloak_vault_secret
			
 
				+      when: vault_client_check.json | length > 0
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Use Keycloak client secret as canonical value"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_oidc_client_secret: "{{ keycloak_vault_secret.json.value }}"
			
 
				+      when: vault_client_check.json | length > 0
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    # ── Add realm roles mapper to vault client ID token ───────────────
			
 
				+    - name: "Vault OIDC | Re-fetch vault client to get UUID"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients?clientId={{ vault_oidc_client_id }}"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_client_fetched
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Set vault client UUID"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_kc_client_uuid: "{{ vault_client_fetched.json[0].id }}"
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | List existing vault client protocol mappers"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients/{{ vault_kc_client_uuid }}/protocol-mappers/models"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_client_mappers
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    - name: "Vault OIDC | Add realm roles mapper to ID token"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ keycloak_base_url }}/admin/realms/{{ keycloak_realm }}/clients/{{ vault_kc_client_uuid }}/protocol-mappers/models"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ kc_token }}"
			
 
				+          Content-Type: application/json
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          name: "realm-roles-id-token"
			
 
				+          protocol: openid-connect
			
 
				+          protocolMapper: "oidc-usermodel-realm-role-mapper"
			
 
				+          config:
			
 
				+            "claim.name": "realm_access.roles"
			
 
				+            "jsonType.label": "String"
			
 
				+            "multivalued": "true"
			
 
				+            "id.token.claim": "true"
			
 
				+            "access.token.claim": "true"
			
 
				+            "userinfo.token.claim": "true"
			
 
				+        status_code: [201, 409]
			
 
				+      when: vault_client_mappers.json | selectattr('name', 'equalto', 'realm-roles-id-token') | list | length == 0
			
 
				+      tags:
			
 
				+        - vault-oidc-keycloak
			
 
				+
			
 
				+    # ── Persist vault OIDC client secret in Vault ────────────────────
			
 
				+    - name: "Vault OIDC | Store vault OIDC client secret in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/{{ vault_secret_prefix }}/vault-oidc"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          data:
			
 
				+            client_secret: "{{ vault_oidc_client_secret }}"
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    # ── Enable and configure Vault OIDC auth method ───────────────────
			
 
				+    - name: "Vault OIDC | Check existing Vault auth methods"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/auth"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        status_code: 200
			
 
				+      register: vault_auth_methods
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    - name: "Vault OIDC | Enable OIDC auth method in Vault"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/auth/oidc"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          type: oidc
			
 
				+        status_code: [200, 204]
			
 
				+      when: "'oidc/' not in vault_auth_methods.json"
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    - name: "Vault OIDC | Configure Vault OIDC auth method"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/auth/oidc/config"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          oidc_discovery_url: "https://idm.{{ domain }}/realms/{{ keycloak_realm }}"
			
 
				+          oidc_client_id: "{{ vault_oidc_client_id }}"
			
 
				+          oidc_client_secret: "{{ vault_oidc_client_secret }}"
			
 
				+          default_role: default
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    # ── Create Vault policy for Keycloak-authenticated users ──────────
			
 
				+    - name: "Vault OIDC | Create vault-admin policy"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/sys/policies/acl/vault-admin"
			
 
				+        method: PUT
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          policy: |
			
 
				+            path "secret/*" {
			
 
				+              capabilities = ["create", "read", "update", "delete", "list"]
			
 
				+            }
			
 
				+            path "sys/health" {
			
 
				+              capabilities = ["read"]
			
 
				+            }
			
 
				+            path "sys/policies/acl" {
			
 
				+              capabilities = ["list"]
			
 
				+            }
			
 
				+            path "sys/policies/acl/*" {
			
 
				+              capabilities = ["read", "list"]
			
 
				+            }
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    # ── Create Vault OIDC role ────────────────────────────────────────
			
 
				+    - name: "Vault OIDC | Create default OIDC role"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "{{ vault_addr }}/v1/auth/oidc/role/default"
			
 
				+        method: POST
			
 
				+        headers:
			
 
				+          X-Vault-Token: "{{ vault_root_token }}"
			
 
				+        body_format: json
			
 
				+        body:
			
 
				+          user_claim: sub
			
 
				+          allowed_redirect_uris:
			
 
				+            - "https://vault.{{ domain }}/ui/vault/auth/oidc/oidc/callback"
			
 
				+            - "http://localhost:8250/oidc/callback"
			
 
				+          bound_claims:
			
 
				+            "/realm_access/roles": ["ai-admin"]
			
 
				+          token_policies:
			
 
				+            - vault-admin
			
 
				+          token_ttl: "4h"
			
 
				+          token_max_ttl: "8h"
			
 
				+          oidc_scopes:
			
 
				+            - openid
			
 
				+            - profile
			
 
				+            - email
			
 
				+        status_code: [200, 204]
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
 
				+
			
 
				+    - name: "Vault OIDC | Display login instructions"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: |
			
 
				+          Vault OIDC login configured.
			
 
				+          In the Vault UI, select method: OIDC, role: default, then click Sign in with OIDC Provider.
			
 
				+          You will be redirected to Keycloak to authenticate.
			
 
				+      tags:
			
 
				+        - vault-oidc-vault
			
--- a/playbooks/site.yml
+++ b/playbooks/site.yml
@@ -0,0 +1,122 @@
 
				+---
			
 
				+# playbooks/site.yml
			
 
				+# Master playbook - imports all playbooks in sequence
			
 
				+
			
 
				+- name: "Site | Import pre-flight checks"
			
 
				+  ansible.builtin.import_playbook: 00_preflight.yml
			
 
				+  tags:
			
 
				+    - preflight
			
 
				+
			
 
				+- name: "Site | Import Vault deployment"
			
 
				+  ansible.builtin.import_playbook: 01_vault.yml
			
 
				+  tags:
			
 
				+    - vault
			
 
				+
			
 
				+- name: "Site | Import infrastructure setup"
			
 
				+  ansible.builtin.import_playbook: 02_infrastructure.yml
			
 
				+  tags:
			
 
				+    - infrastructure
			
 
				+
			
 
				+- name: "Site | Import model benchmarking"
			
 
				+  ansible.builtin.import_playbook: 03_benchmark.yml
			
 
				+  tags:
			
 
				+    - benchmark
			
 
				+
			
 
				+- name: "Site | Import model configuration"
			
 
				+  ansible.builtin.import_playbook: 04_models.yml
			
 
				+  tags:
			
 
				+    - models
			
 
				+
			
 
				+- name: "Site | Import Keycloak deployment"
			
 
				+  ansible.builtin.import_playbook: 05_keycloak.yml
			
 
				+  tags:
			
 
				+    - keycloak
			
 
				+
			
 
				+- name: "Site | Import Qdrant deployment"
			
 
				+  ansible.builtin.import_playbook: 06_qdrant.yml
			
 
				+  tags:
			
 
				+    - qdrant
			
 
				+
			
 
				+- name: "Site | Import Open WebUI deployment"
			
 
				+  ansible.builtin.import_playbook: 07_openwebui.yml
			
 
				+  tags:
			
 
				+    - openwebui
			
 
				+
			
 
				+- name: "Site | Import OpenClaw deployment"
			
 
				+  ansible.builtin.import_playbook: 08_openclaw.yml
			
 
				+  tags:
			
 
				+    - openclaw
			
 
				+
			
 
				+- name: "Site | Import Nginx configuration"
			
 
				+  ansible.builtin.import_playbook: 09_nginx.yml
			
 
				+  tags:
			
 
				+    - nginx
			
 
				+
			
 
				+- name: "Site | Import CoreDNS configuration"
			
 
				+  ansible.builtin.import_playbook: 10_coredns.yml
			
 
				+  tags:
			
 
				+    - coredns
			
 
				+
			
 
				+- name: "Site | Import Vault OIDC configuration"
			
 
				+  ansible.builtin.import_playbook: 11_vault_oidc.yml
			
 
				+  tags:
			
 
				+    - vault-oidc
			
 
				+
			
 
				+# ── Final credentials summary ───────────────────────────────────────
			
 
				+- name: "Site | Display deployment summary"
			
 
				+  hosts: localhost
			
 
				+  connection: local
			
 
				+  gather_facts: false
			
 
				+  tags:
			
 
				+    - summary
			
 
				+  vars:
			
 
				+    vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
			
 
				+    vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
			
 
				+  tasks:
			
 
				+    - name: "Summary | Retrieve Keycloak admin password"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        kc_admin_pass: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:admin_password token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+
			
 
				+    - name: "Summary | Retrieve Keycloak realm admin password"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        kc_realm_admin_pass: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/keycloak:realm_admin_password token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				+
			
 
				+    - name: "Summary | Retrieve Vault root token"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        vault_root_token: "{{ lookup('ansible.builtin.file', playbook_dir ~ '/../vault/.vault-init.json') | from_json | json_query('root_token') }}"
			
 
				+
			
 
				+    - name: "Summary | Display credentials and access information"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: |
			
 
				+          ╔══════════════════════════════════════════════════════════════════╗
			
 
				+          ║             {{ platform_name }} PLATFORM - DEPLOYMENT COMPLETE
			
 
				+          ╠══════════════════════════════════════════════════════════════════╣
			
 
				+          ║                                                                ║
			
 
				+          ║  SERVICES:                                                     ║
			
 
				+          ║  ─────────                                                     ║
			
 
				+          ║  Open WebUI:   {{ openwebui_url }}
			
 
				+          ║  Keycloak:     {{ keycloak_url }}
			
 
				+          ║  Vault:        {{ vault_api_addr }}
			
 
				+          ║  Ollama API:   {{ ollama_api_url }}
			
 
				+          ║  Qdrant:       http://{{ ai_server_ip }}:{{ qdrant_http_port }} (internal only)
			
 
				+          ║                                                                ║
			
 
				+          ║  CREDENTIALS:                                                  ║
			
 
				+          ║  ────────────                                                  ║
			
 
				+          ║  Vault Root Token:        {{ vault_root_token }}
			
 
				+          ║  Vault Token File:        vault/.vault-token                   ║
			
 
				+          ║  Keycloak Admin:          admin / {{ kc_admin_pass }}
			
 
				+          ║  Realm Admin:             {{ keycloak_realm_admin_user }} / {{ kc_realm_admin_pass }}
			
 
				+          ║                                                                ║
			
 
				+          ║  FILES:                                                        ║
			
 
				+          ║  ──────                                                        ║
			
 
				+          ║  Vault Init:     vault/.vault-init.json                        ║
			
 
				+          ║  Ansible Token:  vault/.vault-token                            ║
			
 
				+          ║  Benchmarks:     benchmarks/results/model_selection.json       ║
			
 
				+          ║                                                                ║
			
 
				+          ║  NOTES:                                                        ║
			
 
				+          ║  ──────                                                        ║
			
 
				+          ║  - All secrets are stored in Vault at {{ vault_secret_prefix }}/*
			
 
				+          ║  - Run individual playbooks with --tags for partial deploys    ║
			
 
				+          ║  - Vault must be unsealed after each restart                   ║
			
 
				+          ║                                                                ║
			
 
				+          ╚══════════════════════════════════════════════════════════════════╝
			
--- a/requirements.yml
+++ b/requirements.yml
@@ -0,0 +1,10 @@
 
				+---
			
 
				+collections:
			
 
				+  - name: community.hashi_vault
			
 
				+    version: ">=6.0.0"
			
 
				+  - name: community.docker
			
 
				+    version: ">=3.0.0"
			
 
				+  - name: community.general
			
 
				+    version: ">=8.0.0"
			
 
				+  - name: ansible.posix
			
 
				+    version: ">=1.5.0"
			
--- a/roles/benchmark/README.md
+++ b/roles/benchmark/README.md
@@ -0,0 +1,113 @@
 
				+# Role: benchmark
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Benchmark all installed Ollama models to determine optimal slot assignments. Runs
			
 
				+coding and general-purpose test suites, scores each model, and writes results to
			
 
				+the benchmark report and `model_selection.json`.
			
 
				+
			
 
				+## Test Details
			
 
				+
			
 
				+| Test        | Category | Prompt                                                                                          | Scoring Method            |
			
 
				+|-------------|----------|-------------------------------------------------------------------------------------------------|---------------------------|
			
 
				+| `code_gen`  | coding   | "Write a Python function that implements binary search on a sorted list. Include type hints and docstring." | `def` + `return` present, code structure |
			
 
				+| `debug`     | coding   | "Find and fix the bug: `def factorial(n): return n * factorial(n)`. Explain the issue."          | Base case identified, explanation quality |
			
 
				+| `refactor`  | coding   | "Refactor to list comprehension: `result = []; for i in range(10): if i % 2 == 0: result.append(i*i)`" | List comprehension present, conciseness |
			
 
				+| `explain`   | general  | "Explain recursion to a beginner programmer. Use a simple analogy."                              | Clarity, analogy present, length adequate |
			
 
				+| `creative`  | general  | "Write a short poem about artificial intelligence."                                              | Line count, poetic structure |
			
 
				+| `reasoning` | general  | "A farmer has 17 sheep. All but 9 die. How many are left? Explain step by step."                 | Correct answer (9), reasoning steps |
			
 
				+| `latency`   | latency  | "Hi"                                                                                             | Time to first token (TTFT) |
			
 
				+
			
 
				+## Quality Heuristics
			
 
				+
			
 
				+Each test type uses specific checks to score quality (0.0 to 1.0):
			
 
				+
			
 
				+- **code_gen** -- regex checks for `def `, `return`, type hint patterns (`: `), docstring (`"""`); score based on how many are present
			
 
				+- **debug** -- checks for mention of base case, `if n <= 1` or similar fix, explanation length
			
 
				+- **refactor** -- checks for `[` list comprehension syntax, absence of `for`/`append` loop pattern, output length relative to input
			
 
				+- **explain** -- checks response length (>100 chars), presence of analogy keywords ("like", "imagine", "similar"), paragraph count
			
 
				+- **creative** -- checks line count (>=4), presence of line breaks, absence of purely prose output
			
 
				+- **reasoning** -- checks for "9" in response, presence of step indicators ("step", "first", "because", numbered lists)
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+```
			
 
				+composite = (quality * 0.45) + (tokens_per_sec_normalized * 0.30) + (latency_score * 0.25)
			
 
				+```
			
 
				+
			
 
				+### Example Calculation
			
 
				+
			
 
				+For a model with quality=0.8, tokens/sec=38.2 (fastest=55.8), TTFT=420ms (slowest=510ms):
			
 
				+
			
 
				+```
			
 
				+tokens_per_sec_normalized = 38.2 / 55.8 = 0.685
			
 
				+latency_score = 1.0 - (420 / 510) = 0.176
			
 
				+
			
 
				+composite = (0.8 * 0.45) + (0.685 * 0.30) + (0.176 * 0.25)
			
 
				+          = 0.360 + 0.206 + 0.044
			
 
				+          = 0.610
			
 
				+```
			
 
				+
			
 
				+## Configuration
			
 
				+
			
 
				+All parameters are configurable via `group_vars/all.yml`:
			
 
				+
			
 
				+| Key                            | Default | Description                                    |
			
 
				+|--------------------------------|---------|------------------------------------------------|
			
 
				+| `benchmark_min_tokens_per_sec` | 10      | Minimum tokens/sec to pass a model             |
			
 
				+| `benchmark_max_ttft_ms`        | 5000    | Maximum acceptable time to first token (ms)    |
			
 
				+| `benchmark_quality_weight`     | 0.45    | Weight of quality score in composite            |
			
 
				+| `benchmark_speed_weight`       | 0.30    | Weight of normalized tokens/sec in composite    |
			
 
				+| `benchmark_latency_weight`     | 0.25    | Weight of latency score in composite            |
			
 
				+| `benchmark_coding_threshold`   | 0.15    | Min coding-general delta for coding classification |
			
 
				+
			
 
				+## Candidate Models
			
 
				+
			
 
				+| Model                   | Size  | Expected Speed | Reasoning                              |
			
 
				+|-------------------------|-------|----------------|----------------------------------------|
			
 
				+| `qwen2.5-coder:14b`    | 14B   | ~35-40 tok/s   | Strong coding performance at moderate size |
			
 
				+| `deepseek-coder-v2:16b`| 16B   | ~30-38 tok/s   | Competitive coding with broad language support |
			
 
				+| `llama3.1:8b`          | 8B    | ~50-55 tok/s   | Fast general-purpose model             |
			
 
				+| `mistral:7b`           | 7B    | ~50-58 tok/s   | Fast general-purpose, good reasoning   |
			
 
				+
			
 
				+## Output Files
			
 
				+
			
 
				+### Benchmark Report
			
 
				+
			
 
				+Written to `benchmarks/benchmark_<timestamp>.md`:
			
 
				+
			
 
				+```
			
 
				+| Model                  | Coding Composite | General Composite | Classification | Tokens/sec | TTFT (ms) |
			
 
				+|------------------------|------------------|-------------------|----------------|------------|-----------|
			
 
				+| qwen2.5-coder:14b      | 0.82             | 0.65              | coding         | 38.2       | 420       |
			
 
				+| ...                    | ...              | ...               | ...            | ...        | ...       |
			
 
				+```
			
 
				+
			
 
				+### Model Selection
			
 
				+
			
 
				+Written to `model_selection.json`:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "timestamp": "2025-01-15T10:30:00Z",
			
 
				+  "slot1_coding": "qwen2.5-coder:14b",
			
 
				+  "slot2_general": "llama3.1:8b",
			
 
				+  "slot3_backup": "deepseek-coder-v2:16b",
			
 
				+  "slot4_experimental": null,
			
 
				+  "results": {
			
 
				+    "qwen2.5-coder:14b": {
			
 
				+      "coding_composite": 0.82,
			
 
				+      "general_composite": 0.65,
			
 
				+      "classification": "coding",
			
 
				+      "tokens_per_sec": 38.2,
			
 
				+      "ttft_ms": 420
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags benchmark
			
 
				+```
			
--- a/roles/coredns/README.md
+++ b/roles/coredns/README.md
@@ -0,0 +1,62 @@
 
				+# Role: coredns
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Add DNS records for new services to the CoreDNS zone file on `coredns_host` (`coredns_host_ip`).
			
 
				+
			
 
				+## Zone File
			
 
				+
			
 
				+```
			
 
				+/docker_mounts/coredns/<domain>.db
			
 
				+```
			
 
				+
			
 
				+This file is the authoritative zone file for `{{ domain }}` served by the CoreDNS
			
 
				+Docker container on the coredns_host.
			
 
				+
			
 
				+## Records Managed
			
 
				+
			
 
				+| Record                       | Type | Value               |
			
 
				+|------------------------------|------|---------------------|
			
 
				+| `vault.<domain>`             | A    | `<nginx_proxy_ip>`  |
			
 
				+| `ollama-api.<domain>`        | A    | `<nginx_proxy_ip>`  |
			
 
				+
			
 
				+All service DNS records point to the NGINX proxy (`nginx_proxy_ip`), which handles
			
 
				+TLS termination and reverse proxying to the actual service backends.
			
 
				+
			
 
				+## Entry Format
			
 
				+
			
 
				+Each DNS record follows this format:
			
 
				+
			
 
				+```
			
 
				+<name>.<domain>.  3600  IN  A  <nginx_proxy_ip>
			
 
				+```
			
 
				+
			
 
				+Example (with defaults `domain=example.com`, `nginx_proxy_ip=192.168.1.30`):
			
 
				+
			
 
				+```
			
 
				+vault.example.com.      3600  IN  A  192.168.1.30
			
 
				+ollama-api.example.com. 3600  IN  A  192.168.1.30
			
 
				+```
			
 
				+
			
 
				+## Reload
			
 
				+
			
 
				+After modifying the zone file, CoreDNS is reloaded by sending SIGHUP to the
			
 
				+container:
			
 
				+
			
 
				+```bash
			
 
				+docker kill --signal=SIGHUP coredns
			
 
				+```
			
 
				+
			
 
				+## Idempotency
			
 
				+
			
 
				+The role uses Ansible's `lineinfile` module to add DNS records. This ensures:
			
 
				+
			
 
				+- Records are added only if they do not already exist
			
 
				+- Existing records are not duplicated
			
 
				+- Other records in the zone file are not affected
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags coredns
			
 
				+```
			
--- a/roles/docker/README.md
+++ b/roles/docker/README.md
@@ -0,0 +1,35 @@
 
				+# Role: docker
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Install Docker CE on target hosts that require container runtime.
			
 
				+
			
 
				+## Installation
			
 
				+
			
 
				+Docker CE is installed via `dnf` using the official Docker CE repository. The role:
			
 
				+
			
 
				+1. Adds the Docker CE dnf repository
			
 
				+2. Installs `docker-ce`, `docker-ce-cli`, `containerd.io`, and `docker-compose-plugin`
			
 
				+3. Enables and starts the `docker` service
			
 
				+
			
 
				+## Group Membership
			
 
				+
			
 
				+The following users are added to the `docker` group:
			
 
				+
			
 
				+- `{{ ansible_user }}` -- primary admin user (set via `ansible_user` in `group_vars/all.yml`)
			
 
				+- `ollama` -- Ollama service user
			
 
				+
			
 
				+A logout/login is required for group membership changes to take effect in existing
			
 
				+shell sessions.
			
 
				+
			
 
				+## Idempotency
			
 
				+
			
 
				+- Skips installation if the `docker` binary is already present and the `docker`
			
 
				+  service is running
			
 
				+- Group membership is additive (does not remove existing groups)
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags docker
			
 
				+```
			
--- a/roles/hashi_vault/README.md
+++ b/roles/hashi_vault/README.md
@@ -0,0 +1,119 @@
 
				+# Role: hashi_vault
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Deploy and configure HashiCorp Vault for centralized secret management across the
			
 
				+AI platform.
			
 
				+
			
 
				+## Architecture
			
 
				+
			
 
				+- Runs as a native systemd service on `ai_server` (`ai_server_ip`)
			
 
				+- Listens on `127.0.0.1:{{ vault_port }}` — not exposed directly
			
 
				+- TLS termination handled by NGINX reverse proxy on `nginx_proxy` (`nginx_proxy_ip`)
			
 
				+- Accessible at `https://vault.<domain>`
			
 
				+
			
 
				+## Init Process
			
 
				+
			
 
				+On first run, Vault is initialized with:
			
 
				+
			
 
				+```
			
 
				+vault operator init -key-shares=1 -key-threshold=1
			
 
				+```
			
 
				+
			
 
				+The unseal key and root token are saved to `vault/.vault-init.json`. This file is
			
 
				+gitignored and must never be committed to version control.
			
 
				+
			
 
				+After init, Vault is automatically unsealed and an ansible-scoped token is written to
			
 
				+`vault/.vault-token` (also gitignored) for use by subsequent Ansible tasks.
			
 
				+
			
 
				+## Secret Paths
			
 
				+
			
 
				+All secrets are stored under `{{ vault_secret_prefix }}/` (default: `secret/data/ai-platform`).
			
 
				+
			
 
				+| Secret                       | Path (relative to prefix) | Keys                                               |
			
 
				+|------------------------------|---------------------------|----------------------------------------------------|
			
 
				+| Ollama API key               | `/ollama`                 | `api_key`                                          |
			
 
				+| Keycloak credentials         | `/keycloak`               | `admin_password`, `client_secret`, `realm_admin_password`, `oidc_url` |
			
 
				+| Open WebUI secret key        | `/openwebui`              | `secret_key`                                       |
			
 
				+| OpenClaw Telegram token      | `/openclaw`               | `telegram_token`                                   |
			
 
				+| Vault OIDC client secret     | `/vault-oidc`             | `client_secret`                                    |
			
 
				+
			
 
				+## Idempotency
			
 
				+
			
 
				+Secrets are **only written if they do not already exist** in Vault. Re-running
			
 
				+`01_vault.yml` or `deploy_ai.yml` will never overwrite existing credentials.
			
 
				+
			
 
				+This means the full `deploy_ai.yml` is safe to re-run at any time — running services
			
 
				+are not disrupted because their secrets never change unless explicitly rotated.
			
 
				+
			
 
				+## Credential Rotation
			
 
				+
			
 
				+To rotate a credential, delete its Vault path and re-run the deploy:
			
 
				+
			
 
				+```bash
			
 
				+# Rotate a specific secret
			
 
				+vault kv delete secret/<vault_project_slug>/keycloak
			
 
				+
			
 
				+# Re-run full deploy — new secret generated and all dependent services redeployed
			
 
				+ansible-playbook deploy_ai.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+Deletion forces regeneration on the next run. All services that depend on that
			
 
				+secret are automatically redeployed in the correct dependency order by `deploy_ai.yml`.
			
 
				+
			
 
				+## Policies
			
 
				+
			
 
				+### ansible-policy
			
 
				+
			
 
				+Used by the `vault/.vault-token` ansible token for all Ansible lookups:
			
 
				+
			
 
				+```hcl
			
 
				+path "{{ vault_secret_prefix }}/*" {
			
 
				+  capabilities = ["create", "read", "update", "delete", "list"]
			
 
				+}
			
 
				+path "{{ vault_secret_meta_prefix }}/*" {
			
 
				+  capabilities = ["list", "read", "delete"]
			
 
				+}
			
 
				+path "{{ vault_secret_meta_prefix }}" {
			
 
				+  capabilities = ["list"]
			
 
				+}
			
 
				+path "secret/metadata/" {
			
 
				+  capabilities = ["list"]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### vault-admin policy
			
 
				+
			
 
				+Assigned to users who authenticate via Keycloak OIDC:
			
 
				+
			
 
				+```hcl
			
 
				+path "secret/*" {
			
 
				+  capabilities = ["create", "read", "update", "delete", "list"]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Keycloak OIDC Login
			
 
				+
			
 
				+Vault is configured to accept Keycloak SSO via `playbooks/11_vault_oidc.yml`.
			
 
				+
			
 
				+Only users with the `ai-admin` role in Keycloak can log in via OIDC. In the Vault UI:
			
 
				+
			
 
				+1. Select method: **OIDC**
			
 
				+2. Role: **default**
			
 
				+3. Click **Sign in with OIDC Provider**
			
 
				+4. Authenticate via Keycloak
			
 
				+
			
 
				+The OIDC client secret is stored at `{{ vault_secret_prefix }}/vault-oidc` and is
			
 
				+subject to the same idempotency rules as all other secrets.
			
 
				+
			
 
				+## AppRole
			
 
				+
			
 
				+An AppRole named `{{ vault_approle_name }}` (default: `ai-services`) is created for
			
 
				+container runtime access to secrets.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags vault
			
 
				+ansible-playbook playbooks/site.yml --tags vault-oidc
			
 
				+```
			
--- a/roles/keycloak/README.md
+++ b/roles/keycloak/README.md
@@ -0,0 +1,89 @@
 
				+# Role: keycloak
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Deploy Keycloak 24.x and configure the platform realm for single sign-on (SSO)
			
 
				+across all platform services.
			
 
				+
			
 
				+## Container Configuration
			
 
				+
			
 
				+| Setting                     | Value                                        |
			
 
				+|-----------------------------|----------------------------------------------|
			
 
				+| Image                       | `quay.io/keycloak/keycloak:latest`          |
			
 
				+| Port mapping                | `{{ keycloak_port }}:8080`                  |
			
 
				+| Volume                      | `/mnt/ai_data/keycloak:/opt/keycloak/data`  |
			
 
				+| Data directory ownership    | `1000:1000` (Keycloak container user)       |
			
 
				+| `KC_HOSTNAME`               | `https://idm.<domain>` (full URL required)  |
			
 
				+| `KC_PROXY_HEADERS`          | `xforwarded` (replaces deprecated KC_PROXY) |
			
 
				+| `KC_HTTP_ENABLED`           | `true` (TLS terminated at NGINX)            |
			
 
				+| `KEYCLOAK_ADMIN`            | `admin`                                     |
			
 
				+| `KEYCLOAK_ADMIN_PASSWORD`   | (from Vault)                                |
			
 
				+
			
 
				+> **Note:** `KC_PROXY_HEADERS=xforwarded` replaces the deprecated `KC_PROXY=edge`
			
 
				+> from Keycloak 23 and earlier. The full URL format for `KC_HOSTNAME` is required
			
 
				+> in Keycloak 24+ to correctly generate HTTPS redirect URLs when behind a proxy.
			
 
				+
			
 
				+## Realm Configuration
			
 
				+
			
 
				+- **Realm name:** `{{ keycloak_realm }}` (default: `ai-platform`)
			
 
				+- **Display name:** `{{ keycloak_realm_display }}` (default: `AI Platform`)
			
 
				+
			
 
				+### Clients
			
 
				+
			
 
				+| Client ID    | Purpose                        | Redirect URIs                                  |
			
 
				+|--------------|--------------------------------|------------------------------------------------|
			
 
				+| `open-webui` | Open WebUI SSO                 | `https://ollama-ui.<domain>/*`                 |
			
 
				+| `vault`      | Vault UI OIDC login            | `https://vault.<domain>/ui/vault/auth/oidc/oidc/callback` |
			
 
				+
			
 
				+The `vault` client is created by `playbooks/11_vault_oidc.yml`, not this playbook.
			
 
				+Both client secrets are stored in Vault.
			
 
				+
			
 
				+### Roles
			
 
				+
			
 
				+| Role       | Open WebUI             | Vault OIDC  | Description                 |
			
 
				+|------------|------------------------|-------------|-----------------------------|
			
 
				+| `ai-user`  | ✅ Standard access     | ❌ Blocked  | Standard AI platform user   |
			
 
				+| `ai-admin` | ✅ Admin access        | ✅ Full access | AI platform administrator |
			
 
				+
			
 
				+Assign roles when creating users in the platform realm (not the `master` realm).
			
 
				+
			
 
				+### Pre-created User
			
 
				+
			
 
				+- **`{{ keycloak_realm_admin_user }}`** (default: `ai-platform-admin`) — created with
			
 
				+  the `ai-admin` role assigned. Password stored in Vault at
			
 
				+  `{{ vault_secret_prefix }}/keycloak:realm_admin_password`.
			
 
				+
			
 
				+## Resetting Keycloak
			
 
				+
			
 
				+If the Keycloak database needs to be wiped (e.g. admin password mismatch after
			
 
				+credential rotation), stop the container, remove the data directory, and re-run:
			
 
				+
			
 
				+```bash
			
 
				+ssh <ai_server_ip> 'sudo docker stop keycloak; sudo docker rm keycloak; sudo rm -rf /mnt/ai_data/keycloak/*'
			
 
				+ansible-playbook playbooks/05_keycloak.yml -K -e @local.yml
			
 
				+ansible-playbook playbooks/11_vault_oidc.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+Keycloak will initialize from scratch using the current credentials in Vault.
			
 
				+Re-run `11_vault_oidc.yml` afterward to recreate the `vault` OIDC client.
			
 
				+
			
 
				+## OIDC Endpoint
			
 
				+
			
 
				+```
			
 
				+https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration
			
 
				+```
			
 
				+
			
 
				+## Admin Console
			
 
				+
			
 
				+```
			
 
				+https://idm.<domain>/admin/
			
 
				+```
			
 
				+
			
 
				+Log in with the `admin` user and the password stored in Vault at
			
 
				+`{{ vault_secret_prefix }}/keycloak:admin_password`.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags keycloak
			
 
				+```
			
--- a/roles/models/README.md
+++ b/roles/models/README.md
@@ -0,0 +1,80 @@
 
				+# Role: models
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Manage the Ollama model lifecycle -- pulling models, creating custom Modelfile
			
 
				+configurations, and running a warm-up service to ensure models are loaded into GPU
			
 
				+memory at boot time.
			
 
				+
			
 
				+## Slot System
			
 
				+
			
 
				+| Slot | Role               | Selection Method                         |
			
 
				+|------|--------------------|------------------------------------------|
			
 
				+| 1    | Primary Coding     | Highest coding composite from benchmarks |
			
 
				+| 2    | Primary General    | Highest general composite from benchmarks|
			
 
				+| 3    | Secondary / Backup | Next-best overall average composite      |
			
 
				+| 4    | Experimental       | Manual override via `-e slot4_model=<name>` |
			
 
				+
			
 
				+## Slot Rotation
			
 
				+
			
 
				+To override slot 4 with a specific model at runtime:
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_ollama.yml -e slot4_model=mistral:7b
			
 
				+```
			
 
				+
			
 
				+Slots 1-3 are automatically assigned based on the latest benchmark results in
			
 
				+`model_selection.json`. Slot 4 is always user-controlled.
			
 
				+
			
 
				+## Modelfile Configurations
			
 
				+
			
 
				+Custom Modelfile variants are created for fine-tuned context windows and use cases:
			
 
				+
			
 
				+| Custom Model          | Base Model           | Context Window | Use Case                    |
			
 
				+|-----------------------|----------------------|----------------|-----------------------------|
			
 
				+| `coding-primary`     | (slot 1 model)       | 32768          | Code generation and debugging |
			
 
				+| `general-primary`    | (slot 2 model)       | 16384          | General conversation and reasoning |
			
 
				+| `backup`             | (slot 3 model)       | 16384          | Fallback for either category |
			
 
				+| `experimental`       | (slot 4 model)       | 8192           | Testing new models           |
			
 
				+
			
 
				+## Warm-up Service
			
 
				+
			
 
				+The role deploys `ollama-warmup.service`, a oneshot systemd service that runs after
			
 
				+`ollama.service` starts.
			
 
				+
			
 
				+**Why it is needed:** Even though `OLLAMA_KEEP_ALIVE=-1` keeps models loaded in GPU
			
 
				+memory indefinitely once loaded, Ollama does not automatically load models on
			
 
				+startup. The warm-up service sends a minimal inference request to each slot model,
			
 
				+triggering the initial load into GPU memory. Without this, the first user request
			
 
				+to each model would experience a long delay while the model is loaded.
			
 
				+
			
 
				+The warm-up service:
			
 
				+
			
 
				+1. Waits for Ollama API to be healthy
			
 
				+2. Sends a short prompt to each configured slot model
			
 
				+3. Exits after all models are loaded
			
 
				+
			
 
				+## model_selection.json
			
 
				+
			
 
				+The model selection file is read by this role to determine which models to assign to
			
 
				+each slot. Schema:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "timestamp": "2025-01-15T10:30:00Z",
			
 
				+  "slot1_coding": "qwen2.5-coder:14b",
			
 
				+  "slot2_general": "llama3.1:8b",
			
 
				+  "slot3_backup": "deepseek-coder-v2:16b",
			
 
				+  "slot4_experimental": null
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+If `model_selection.json` does not exist (first run before benchmarks), the role
			
 
				+falls back to default models defined in `group_vars/all.yml`.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags models
			
 
				+ansible-playbook playbooks/site.yml --tags warmup
			
 
				+```
			
--- a/roles/nginx/README.md
+++ b/roles/nginx/README.md
@@ -0,0 +1,72 @@
 
				+# Role: nginx
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Manage NGINX reverse proxy configurations on `nginx_proxy` (`nginx_proxy_ip`) for all
			
 
				+platform services.
			
 
				+
			
 
				+## Managed Configurations
			
 
				+
			
 
				+| Config File              | Service    | Upstream Target                      |
			
 
				+|--------------------------|------------|--------------------------------------|
			
 
				+| `vault.conf`             | Vault      | `<ai_server_ip>:<vault_port>`        |
			
 
				+| `ollama-api.conf`        | Ollama API | `<ai_server_ip>:<ollama_port>`       |
			
 
				+| `keycloak-proxy.conf`    | Keycloak   | `<ai_server_ip>:<keycloak_port>`     |
			
 
				+
			
 
				+Each configuration file is placed in `/etc/nginx/conf.d/` on the proxy host.
			
 
				+
			
 
				+## SSL Certificates
			
 
				+
			
 
				+TLS certificates for `*.<domain>` are stored at:
			
 
				+
			
 
				+- **Certificate:** `/etc/nginx/ssl/<domain>.crt`
			
 
				+- **Private key:** `/etc/nginx/ssl/<domain>.key`
			
 
				+
			
 
				+All reverse proxy configs reference these paths (via `nginx_ssl_cert` / `nginx_ssl_key` variables)
			
 
				+for TLS termination.
			
 
				+
			
 
				+## How to Add a New Service
			
 
				+
			
 
				+1. Create a new config file following the existing template pattern:
			
 
				+
			
 
				+```nginx
			
 
				+upstream new_service {
			
 
				+    server {{ ai_server_ip }}:<target_port>;
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 443 ssl;
			
 
				+    server_name new-service.{{ domain }};
			
 
				+
			
 
				+    ssl_certificate     {{ nginx_ssl_cert }};
			
 
				+    ssl_certificate_key {{ nginx_ssl_key }};
			
 
				+
			
 
				+    location / {
			
 
				+        proxy_pass http://new_service;
			
 
				+        proxy_set_header Host $host;
			
 
				+        proxy_set_header X-Real-IP $remote_addr;
			
 
				+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
			
 
				+        proxy_set_header X-Forwarded-Proto $scheme;
			
 
				+    }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+2. Add the template to the role's `templates/` directory
			
 
				+3. Add the DNS record via the `coredns` role
			
 
				+
			
 
				+## Configuration Validation
			
 
				+
			
 
				+Before reloading NGINX, the role runs:
			
 
				+
			
 
				+```bash
			
 
				+nginx -t
			
 
				+```
			
 
				+
			
 
				+The reload is only performed if the configuration test passes. This prevents
			
 
				+broken configs from taking down the proxy.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags nginx
			
 
				+```
			
--- a/roles/ollama/README.md
+++ b/roles/ollama/README.md
@@ -0,0 +1,70 @@
 
				+# Role: ollama
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Install, configure, and maintain the Ollama inference server on the AI server host.
			
 
				+
			
 
				+## Installation
			
 
				+
			
 
				+Ollama is installed using the official install script, which places the binary at
			
 
				+`/usr/local/bin/ollama` and creates a systemd service. The script handles both fresh
			
 
				+installs and upgrades.
			
 
				+
			
 
				+## Environment Variables
			
 
				+
			
 
				+Configuration is applied via a systemd drop-in override file at
			
 
				+`/etc/systemd/system/ollama.service.d/override.conf`.
			
 
				+
			
 
				+| Variable                  | Value              | Description                                      |
			
 
				+|---------------------------|--------------------|--------------------------------------------------|
			
 
				+| `OLLAMA_HOST`             | `0.0.0.0:11434`   | Listen on all interfaces, port 11434             |
			
 
				+| `OLLAMA_MODELS`           | `/mnt/ai_data/ollama/models` | Model storage directory                |
			
 
				+| `OLLAMA_KEEP_ALIVE`       | `-1`               | Keep models loaded in GPU memory indefinitely    |
			
 
				+| `OLLAMA_NUM_PARALLEL`     | `4`                | Number of parallel inference requests            |
			
 
				+| `OLLAMA_MAX_LOADED_MODELS`| `4`                | Maximum models loaded in GPU memory at once      |
			
 
				+| `OLLAMA_API_KEY`          | (from Vault)       | API key for authentication                       |
			
 
				+| `OLLAMA_FLASH_ATTENTION`  | `1`                | Enable Flash Attention for performance           |
			
 
				+| `OLLAMA_CONTEXT_LENGTH`   | `32768`            | Default context window size                      |
			
 
				+
			
 
				+## Override.conf Approach
			
 
				+
			
 
				+Rather than modifying the upstream systemd unit file (which would be overwritten on
			
 
				+upgrades), this role uses a systemd drop-in directory:
			
 
				+
			
 
				+```
			
 
				+/etc/systemd/system/ollama.service.d/override.conf
			
 
				+```
			
 
				+
			
 
				+This ensures environment variables survive Ollama upgrades while keeping the
			
 
				+upstream service file intact.
			
 
				+
			
 
				+## Why OLLAMA_API_KEY
			
 
				+
			
 
				+Without an API key, anyone with network access to port 11434 can use the Ollama API
			
 
				+to run inference, pull models, or delete models. Setting `OLLAMA_API_KEY` requires
			
 
				+all API requests to include an `Authorization: Bearer <key>` header, preventing
			
 
				+unauthenticated access.
			
 
				+
			
 
				+## OLLAMA_FLASH_ATTENTION
			
 
				+
			
 
				+Flash Attention is a GPU memory optimization that reduces memory usage and increases
			
 
				+throughput for transformer inference. Setting `OLLAMA_FLASH_ATTENTION=1` enables
			
 
				+this optimization for all models. This is a newer addition to Ollama and provides
			
 
				+measurable performance improvements.
			
 
				+
			
 
				+## Upgrade Procedure
			
 
				+
			
 
				+To upgrade Ollama to the latest version:
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_ollama.yml
			
 
				+```
			
 
				+
			
 
				+The official install script detects the existing installation and performs an
			
 
				+in-place upgrade. The service is restarted after the upgrade.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags ollama
			
 
				+```
			
--- a/roles/openclaw/README.md
+++ b/roles/openclaw/README.md
@@ -0,0 +1,58 @@
 
				+# Role: openclaw
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Deploy OpenClaw, a Telegram bot that provides access to Ollama models via Telegram
			
 
				+messaging.
			
 
				+
			
 
				+## Prerequisites
			
 
				+
			
 
				+- A Telegram bot token obtained from [@BotFather](https://t.me/BotFather)
			
 
				+- The token must be stored in Vault at `{{ vault_secret_prefix }}/openclaw:telegram_token`
			
 
				+
			
 
				+## Installation
			
 
				+
			
 
				+1. Node.js 20 is installed on the target host
			
 
				+2. OpenClaw is installed globally via `npm install -g openclaw`
			
 
				+3. A systemd service (`openclaw.service`) is created for process management
			
 
				+
			
 
				+## Configuration
			
 
				+
			
 
				+Config file location: `/mnt/ai_data/openclaw/config.yml`
			
 
				+
			
 
				+The configuration includes:
			
 
				+
			
 
				+- Ollama API endpoint and authentication
			
 
				+- Telegram bot token (read from Vault)
			
 
				+- Default model selection
			
 
				+- Allowed user IDs (if access control is needed)
			
 
				+
			
 
				+## Service
			
 
				+
			
 
				+```
			
 
				+/etc/systemd/system/openclaw.service
			
 
				+```
			
 
				+
			
 
				+The service runs as a systemd unit, automatically starting on boot and restarting
			
 
				+on failure.
			
 
				+
			
 
				+## Vault Integration
			
 
				+
			
 
				+The Telegram bot token is stored in Vault:
			
 
				+
			
 
				+- **Path:** `{{ vault_secret_prefix }}/openclaw`
			
 
				+- **Key:** `telegram_token`
			
 
				+
			
 
				+The role reads the token from Vault at deploy time and writes it to the config file.
			
 
				+
			
 
				+## Skipping Installation
			
 
				+
			
 
				+If no Telegram bot token is configured (the Vault secret is empty or absent),
			
 
				+the OpenClaw installation is skipped entirely during `site.yml`. This allows
			
 
				+running the full playbook without a Telegram bot token if the feature is not needed.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags openclaw
			
 
				+```
			
--- a/roles/openwebui/README.md
+++ b/roles/openwebui/README.md
@@ -0,0 +1,73 @@
 
				+# Role: openwebui
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Deploy Open WebUI with full Ollama integration, RAG support via Qdrant, and SSO via
			
 
				+Keycloak OIDC.
			
 
				+
			
 
				+## Environment Variables
			
 
				+
			
 
				+| Variable                      | Value                                                        | Source      |
			
 
				+|-------------------------------|--------------------------------------------------------------|-------------|
			
 
				+| `OLLAMA_BASE_URL`             | `http://host.docker.internal:11434`                         | Hardcoded   |
			
 
				+| `OLLAMA_API_KEY`              | (Ollama API key)                                             | Vault       |
			
 
				+| `WEBUI_SECRET_KEY`            | (session signing key)                                        | Vault       |
			
 
				+| `VECTOR_DB`                   | `qdrant`                                                     | Hardcoded   |
			
 
				+| `QDRANT_URI`                  | `http://host.docker.internal:6333`                          | Hardcoded   |
			
 
				+| `ENABLE_RAG_WEB_SEARCH`      | `true`                                                       | Hardcoded   |
			
 
				+| `OAUTH_CLIENT_ID`            | `open-webui`                                                 | Hardcoded   |
			
 
				+| `OAUTH_CLIENT_SECRET`        | (OIDC client secret)                                         | Vault       |
			
 
				+| `OPENID_PROVIDER_URL`        | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration` | Vault (keycloak_oidc_url) |
			
 
				+| `OAUTH_PROVIDER_NAME`        | `{{ platform_name }}`                                        | group_vars  |
			
 
				+| `ENABLE_OAUTH_SIGNUP`        | `true`                                                       | Hardcoded   |
			
 
				+| `DEFAULT_USER_ROLE`          | `user`                                                       | Hardcoded   |
			
 
				+| `WEBUI_NAME`                 | `{{ platform_name }}`                                        | group_vars  |
			
 
				+| `ENABLE_OAUTH_ROLE_MANAGEMENT` | `true`                                                     | Hardcoded   |
			
 
				+| `OAUTH_ROLES_CLAIM`          | `realm_access.roles`                                         | Hardcoded   |
			
 
				+| `OAUTH_ALLOWED_ROLES`        | `ai-user,ai-admin`                                           | Hardcoded   |
			
 
				+| `OAUTH_ADMIN_ROLES`          | `ai-admin`                                                   | Hardcoded   |
			
 
				+
			
 
				+## OIDC Setup
			
 
				+
			
 
				+Open WebUI uses Keycloak as its OIDC provider:
			
 
				+
			
 
				+1. `OAUTH_CLIENT_ID` is set to `open-webui` (matching the Keycloak client)
			
 
				+2. `OAUTH_CLIENT_SECRET` is read from Vault at `{{ vault_secret_prefix }}/keycloak:client_secret`
			
 
				+3. `OPENID_PROVIDER_URL` points to the Keycloak OIDC discovery endpoint
			
 
				+
			
 
				+## RAG
			
 
				+
			
 
				+- **Vector DB:** Qdrant at `http://host.docker.internal:6333`
			
 
				+- **Web search:** enabled via `ENABLE_RAG_WEB_SEARCH=true`
			
 
				+- Users can upload documents through the Open WebUI interface for RAG-augmented
			
 
				+  conversations
			
 
				+
			
 
				+## Model Access
			
 
				+
			
 
				+Open WebUI connects to Ollama at `http://host.docker.internal:11434` (the Docker
			
 
				+host network). The `OLLAMA_API_KEY` environment variable authenticates API requests
			
 
				+to the Ollama server.
			
 
				+
			
 
				+## SSO
			
 
				+
			
 
				+Users see a "Sign in with {{ platform_name }}" button on the login page. Clicking it
			
 
				+redirects to the Keycloak login page for the `{{ keycloak_realm }}` realm. After
			
 
				+authentication, users are redirected back to Open WebUI.
			
 
				+
			
 
				+Access is restricted by Keycloak realm role:
			
 
				+
			
 
				+| Keycloak role | Open WebUI access      |
			
 
				+|---------------|------------------------|
			
 
				+| `ai-user`     | ✅ Standard user       |
			
 
				+| `ai-admin`    | ✅ Admin               |
			
 
				+| *(none)*      | ❌ Login blocked       |
			
 
				+
			
 
				+New users who authenticate via SSO are automatically created. Their Open WebUI role
			
 
				+is set based on `OAUTH_ADMIN_ROLES` — users with `ai-admin` get admin access,
			
 
				+all others get standard user access.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags openwebui
			
 
				+```
			
--- a/roles/preflight/README.md
+++ b/roles/preflight/README.md
@@ -0,0 +1,47 @@
 
				+# Role: preflight
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Pre-flight validation that runs before any changes are made to the infrastructure.
			
 
				+Ensures all hosts are reachable, have sufficient resources, and that critical
			
 
				+services are in an expected state.
			
 
				+
			
 
				+## What It Checks
			
 
				+
			
 
				+| Check             | Hosts            | Condition                                      |
			
 
				+|-------------------|------------------|------------------------------------------------|
			
 
				+| SSH access        | All 3 hosts      | Can connect via SSH                            |
			
 
				+| sudo access       | All 3 hosts      | Can escalate to root                           |
			
 
				+| Disk space (data) | ai_server        | `/mnt/ai_data` has >= 500 GB free              |
			
 
				+| Disk space (root) | coredns_host     | `/` has >= 10 GB free                          |
			
 
				+| Ollama API health | ai_server        | `curl localhost:11434` returns HTTP 200         |
			
 
				+
			
 
				+## Requirements
			
 
				+
			
 
				+- Valid Ansible inventory with all 3 hosts configured
			
 
				+- SSH key auth or password auth configured for the target user
			
 
				+
			
 
				+## Failure Modes
			
 
				+
			
 
				+Every check produces a clear failure message that includes:
			
 
				+
			
 
				+- **Host** -- which host failed the check
			
 
				+- **Check** -- what was being validated
			
 
				+- **Expected** -- the passing condition
			
 
				+- **Actual** -- what was found
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+```
			
 
				+FAILED: Host ai_server, Check: disk_space(/mnt/ai_data),
			
 
				+        Expected: >= 500GB free, Actual: 312GB free
			
 
				+```
			
 
				+
			
 
				+The playbook halts immediately on any preflight failure. No changes are made to any
			
 
				+host until all preflight checks pass.
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags preflight
			
 
				+```
			
--- a/roles/qdrant/README.md
+++ b/roles/qdrant/README.md
@@ -0,0 +1,50 @@
 
				+# Role: qdrant
			
 
				+
			
 
				+## Purpose
			
 
				+
			
 
				+Deploy Qdrant vector database for retrieval-augmented generation (RAG) in Open WebUI.
			
 
				+
			
 
				+## Container Configuration
			
 
				+
			
 
				+| Setting      | Value                              |
			
 
				+|--------------|------------------------------------|
			
 
				+| Image        | `qdrant/qdrant:latest`            |
			
 
				+| HTTP port    | `6333`                             |
			
 
				+| gRPC port    | `6334`                             |
			
 
				+| Volume       | `/mnt/ai_data/qdrant:/qdrant/storage` |
			
 
				+
			
 
				+## Integration with Open WebUI
			
 
				+
			
 
				+Open WebUI is configured to use Qdrant as its vector database backend:
			
 
				+
			
 
				+| Open WebUI Env Var | Value                          |
			
 
				+|--------------------|--------------------------------|
			
 
				+| `VECTOR_DB`        | `qdrant`                       |
			
 
				+| `QDRANT_URI`       | `http://host.docker.internal:6333` |
			
 
				+
			
 
				+## Collections
			
 
				+
			
 
				+Collections are auto-managed by Open WebUI. When a user uploads documents or enables
			
 
				+RAG for a conversation, Open WebUI automatically creates and populates the necessary
			
 
				+Qdrant collections. No manual collection management is required.
			
 
				+
			
 
				+## Backup
			
 
				+
			
 
				+To back up the Qdrant data:
			
 
				+
			
 
				+```bash
			
 
				+# Stop the container to ensure data consistency
			
 
				+docker stop qdrant
			
 
				+
			
 
				+# Archive the data directory
			
 
				+tar -czf qdrant_backup_$(date +%Y%m%d).tar.gz /mnt/ai_data/qdrant
			
 
				+
			
 
				+# Restart the container
			
 
				+docker start qdrant
			
 
				+```
			
 
				+
			
 
				+## Tags
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/site.yml --tags qdrant
			
 
				+```
			
--- a/templates/modelfiles/coding-128k.Modelfile.j2
+++ b/templates/modelfiles/coding-128k.Modelfile.j2
@@ -0,0 +1,8 @@
 
				+FROM {{ slot3_model }}
			
 
				+
			
 
				+PARAMETER num_ctx 131072
			
 
				+PARAMETER num_predict -1
			
 
				+PARAMETER temperature 0.2
			
 
				+PARAMETER top_p 0.9
			
 
				+
			
 
				+SYSTEM """You are an expert coding assistant with deep knowledge of software engineering, algorithms, and best practices. You write clean, efficient, well-documented code with type hints and comprehensive error handling. When asked to write code, always include docstrings and type annotations. When debugging, systematically identify root causes. Always prefer idiomatic solutions."""
			
--- a/templates/modelfiles/coding-32k.Modelfile.j2
+++ b/templates/modelfiles/coding-32k.Modelfile.j2
@@ -0,0 +1,8 @@
 
				+FROM {{ slot4_model }}
			
 
				+
			
 
				+PARAMETER num_ctx 32768
			
 
				+PARAMETER num_predict -1
			
 
				+PARAMETER temperature 0.2
			
 
				+PARAMETER top_p 0.9
			
 
				+
			
 
				+SYSTEM """You are an expert coding assistant with deep knowledge of software engineering, algorithms, and best practices. You write clean, efficient, well-documented code with type hints and comprehensive error handling. When asked to write code, always include docstrings and type annotations. When debugging, systematically identify root causes. Always prefer idiomatic solutions."""
			
--- a/templates/modelfiles/family-fast.Modelfile.j2
+++ b/templates/modelfiles/family-fast.Modelfile.j2
@@ -0,0 +1,8 @@
 
				+FROM {{ family_model }}
			
 
				+
			
 
				+PARAMETER num_ctx 8192
			
 
				+PARAMETER num_predict -1
			
 
				+PARAMETER temperature 0.7
			
 
				+PARAMETER top_p 0.95
			
 
				+
			
 
				+SYSTEM """You are a friendly, helpful family assistant. You provide fun, engaging, and age-appropriate responses suitable for all family members. You suggest creative activities, answer questions thoughtfully, and keep conversations positive and encouraging. You avoid any inappropriate content."""
			
--- a/templates/nginx/keycloak-proxy.conf.j2
+++ b/templates/nginx/keycloak-proxy.conf.j2
@@ -0,0 +1,36 @@
 
				+upstream keycloak_backend {
			
 
				+    server {{ ai_server_ip }}:{{ keycloak_port }};
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 80;
			
 
				+    server_name idm.{{ domain }};
			
 
				+    return 301 https://$server_name$request_uri;
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 443 ssl http2;
			
 
				+    server_name idm.{{ domain }};
			
 
				+
			
 
				+    ssl_certificate     {{ nginx_ssl_cert }};
			
 
				+    ssl_certificate_key {{ nginx_ssl_key }};
			
 
				+    ssl_protocols       TLSv1.2 TLSv1.3;
			
 
				+    ssl_ciphers         HIGH:!aNULL:!MD5;
			
 
				+
			
 
				+    client_max_body_size 10m;
			
 
				+
			
 
				+    location / {
			
 
				+        proxy_pass              http://keycloak_backend;
			
 
				+        proxy_set_header        Host                $host;
			
 
				+        proxy_set_header        X-Real-IP           $remote_addr;
			
 
				+        proxy_set_header        X-Forwarded-For     $proxy_add_x_forwarded_for;
			
 
				+        proxy_set_header        X-Forwarded-Proto   $scheme;
			
 
				+        proxy_set_header        X-Forwarded-Host    $host;
			
 
				+        proxy_set_header        X-Forwarded-Port    443;
			
 
				+        proxy_http_version      1.1;
			
 
				+        proxy_read_timeout      300s;
			
 
				+        proxy_buffer_size       128k;
			
 
				+        proxy_buffers           4 256k;
			
 
				+        proxy_busy_buffers_size 256k;
			
 
				+    }
			
 
				+}
			
--- a/templates/nginx/ollama-api.conf.j2
+++ b/templates/nginx/ollama-api.conf.j2
@@ -0,0 +1,35 @@
 
				+upstream ollama_backend {
			
 
				+    server {{ ai_server_ip }}:{{ ollama_port }};
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 80;
			
 
				+    server_name ollama-api.{{ domain }};
			
 
				+    return 301 https://$server_name$request_uri;
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 443 ssl http2;
			
 
				+    server_name ollama-api.{{ domain }};
			
 
				+
			
 
				+    ssl_certificate     {{ nginx_ssl_cert }};
			
 
				+    ssl_certificate_key {{ nginx_ssl_key }};
			
 
				+    ssl_protocols       TLSv1.2 TLSv1.3;
			
 
				+    ssl_ciphers         HIGH:!aNULL:!MD5;
			
 
				+
			
 
				+    client_max_body_size 100m;
			
 
				+
			
 
				+    location / {
			
 
				+        proxy_pass          http://ollama_backend;
			
 
				+        proxy_set_header    Host              $host;
			
 
				+        proxy_set_header    X-Real-IP         $remote_addr;
			
 
				+        proxy_set_header    X-Forwarded-For   $proxy_add_x_forwarded_for;
			
 
				+        proxy_set_header    X-Forwarded-Proto $scheme;
			
 
				+        proxy_http_version  1.1;
			
 
				+        proxy_read_timeout  600s;
			
 
				+        proxy_send_timeout  600s;
			
 
				+        proxy_buffering     off;
			
 
				+        proxy_request_buffering off;
			
 
				+        chunked_transfer_encoding on;
			
 
				+    }
			
 
				+}
			
--- a/templates/nginx/vault.conf.j2
+++ b/templates/nginx/vault.conf.j2
@@ -0,0 +1,33 @@
 
				+upstream vault_backend {
			
 
				+    server {{ ai_server_ip }}:{{ vault_port }};
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 80;
			
 
				+    server_name vault.{{ domain }};
			
 
				+    return 301 https://$server_name$request_uri;
			
 
				+}
			
 
				+
			
 
				+server {
			
 
				+    listen 443 ssl http2;
			
 
				+    server_name vault.{{ domain }};
			
 
				+
			
 
				+    ssl_certificate     {{ nginx_ssl_cert }};
			
 
				+    ssl_certificate_key {{ nginx_ssl_key }};
			
 
				+    ssl_protocols       TLSv1.2 TLSv1.3;
			
 
				+    ssl_ciphers         HIGH:!aNULL:!MD5;
			
 
				+
			
 
				+    client_max_body_size 10m;
			
 
				+
			
 
				+    location / {
			
 
				+        proxy_pass         http://vault_backend;
			
 
				+        proxy_set_header   Host              $host;
			
 
				+        proxy_set_header   X-Real-IP         $remote_addr;
			
 
				+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
			
 
				+        proxy_set_header   X-Forwarded-Proto $scheme;
			
 
				+        proxy_http_version 1.1;
			
 
				+        proxy_set_header   Upgrade           $http_upgrade;
			
 
				+        proxy_set_header   Connection        "upgrade";
			
 
				+        proxy_read_timeout 300s;
			
 
				+    }
			
 
				+}
			
--- a/templates/ollama/override.conf.j2
+++ b/templates/ollama/override.conf.j2
@@ -0,0 +1,38 @@
 
				+[Service]
			
 
				+# ── Ollama API & model settings ────────────────────────────────────────────
			
 
				+Environment="OLLAMA_API_KEY={{ ollama_api_key }}"
			
 
				+Environment="OLLAMA_HOST=0.0.0.0:11434"
			
 
				+Environment="OLLAMA_MODELS=/mnt/ai_data/ollama_models"
			
 
				+Environment="OLLAMA_KEEP_ALIVE=-1"
			
 
				+
			
 
				+# ── Inference performance ──────────────────────────────────────────────────
			
 
				+# Flash attention: fused softmax, ~20% less memory bandwidth, faster on AVX2
			
 
				+Environment="OLLAMA_FLASH_ATTENTION=1"
			
 
				+
			
 
				+# Threads: 28 logical CPUs on NUMA node 1 (14 physical cores × 2 HT)
			
 
				+# Covers all threads on the socket so no cross-socket migrations occur
			
 
				+Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
			
 
				+
			
 
				+# Parallel inference streams — 4 simultaneous requests, 7 threads each
			
 
				+Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
			
 
				+
			
 
				+# Keep 4 models warm in RAM (KEEP_ALIVE=-1 means never unload)
			
 
				+Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
			
 
				+
			
 
				+# ── NUMA / CPU binding ────────────────────────────────────────────────────
			
 
				+# Pin all Ollama threads to NUMA node 1 CPUs (odd: 1,3,5,...,55).
			
 
				+# Node 1 has ~120 GB free RAM vs node 0's ~75 GB.
			
 
				+# CPUAffinity prevents cross-NUMA thread migration; Linux will naturally
			
 
				+# allocate memory from the local node when all threads are on that node.
			
 
				+CPUAffinity={{ ollama_cpu_affinity }}
			
 
				+
			
 
				+# ── Memory hardening ───────────────────────────────────────────────────────
			
 
				+# Prevent model weights from being paged out under memory pressure
			
 
				+LimitMEMLOCK=infinity
			
 
				+
			
 
				+# Sufficient file descriptors for parallel connections and mmap'd model files
			
 
				+LimitNOFILE=65535
			
 
				+
			
 
				+# Disable OOM kill — losing a loaded model mid-inference is worse than
			
 
				+# the kernel reclaiming other memory first
			
 
				+OOMScoreAdjust=-500
			
--- a/templates/ollama/warmup.sh.j2
+++ b/templates/ollama/warmup.sh.j2
@@ -0,0 +1,26 @@
 
				+#!/bin/bash
			
 
				+# Ollama model warm-up script
			
 
				+# Sends a 1-token generation to each slot model to pin them in RAM
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+OLLAMA_URL="http://localhost:11434"
			
 
				+API_KEY="{{ ollama_api_key }}"
			
 
				+
			
 
				+warmup_model() {
			
 
				+    local model="$1"
			
 
				+    echo "[warmup] Loading model: $model"
			
 
				+    curl -sf -X POST "${OLLAMA_URL}/api/generate" \
			
 
				+        -H "Authorization: Bearer ${API_KEY}" \
			
 
				+        -H "Content-Type: application/json" \
			
 
				+        -d "{\"model\":\"${model}\",\"prompt\":\"Hi\",\"stream\":false,\"options\":{\"num_predict\":1}}" \
			
 
				+        > /dev/null || echo "[warmup] Warning: failed to warm up ${model}"
			
 
				+    echo "[warmup] Done: $model"
			
 
				+}
			
 
				+
			
 
				+warmup_model "{{ slot1_model }}"
			
 
				+warmup_model "{{ slot2_model }}"
			
 
				+warmup_model "{{ slot3_model }}"
			
 
				+warmup_model "{{ slot4_model }}"
			
 
				+
			
 
				+echo "[warmup] All models warmed up."
			
--- a/templates/openclaw/bot.py
+++ b/templates/openclaw/bot.py
@@ -0,0 +1,83 @@
 
				+#!/usr/bin/env python3
			
 
				+"""OpenClaw — Telegram bot that proxies messages to Ollama."""
			
 
				+
			
 
				+import sys
			
 
				+import logging
			
 
				+import yaml
			
 
				+import requests
			
 
				+from telegram import Update
			
 
				+from telegram.ext import ApplicationBuilder, MessageHandler, CommandHandler, filters, ContextTypes
			
 
				+
			
 
				+CONFIG = {}
			
 
				+
			
 
				+
			
 
				+def load_config(path: str) -> dict:
			
 
				+    with open(path) as f:
			
 
				+        return yaml.safe_load(f)
			
 
				+
			
 
				+
			
 
				+async def cmd_start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
			
 
				+    model = CONFIG["ollama"]["model"]
			
 
				+    await update.message.reply_text(
			
 
				+        f"Hello! I'm connected to Ollama using {model}. Send me any message."
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
			
 
				+    text = update.message.text
			
 
				+
			
 
				+    if CONFIG["bot"].get("typing_indicator", True):
			
 
				+        await context.bot.send_chat_action(
			
 
				+            chat_id=update.effective_chat.id, action="typing"
			
 
				+        )
			
 
				+
			
 
				+    try:
			
 
				+        resp = requests.post(
			
 
				+            f"{CONFIG['ollama']['base_url']}/api/generate",
			
 
				+            json={
			
 
				+                "model": CONFIG["ollama"]["model"],
			
 
				+                "prompt": text,
			
 
				+                "stream": False,
			
 
				+            },
			
 
				+            headers={"Authorization": f"Bearer {CONFIG['ollama']['api_key']}"},
			
 
				+            timeout=CONFIG["ollama"]["timeout"],
			
 
				+        )
			
 
				+        resp.raise_for_status()
			
 
				+        result = resp.json()["response"]
			
 
				+
			
 
				+        max_len = CONFIG["bot"].get("max_message_length", 4096)
			
 
				+        if len(result) > max_len:
			
 
				+            result = result[:max_len]
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logging.error("Ollama error: %s", e)
			
 
				+        result = CONFIG["bot"].get("error_message", "Sorry, I encountered an error.")
			
 
				+
			
 
				+    await update.message.reply_text(result)
			
 
				+
			
 
				+
			
 
				+def main() -> None:
			
 
				+    global CONFIG
			
 
				+
			
 
				+    config_path = "config.yml"
			
 
				+    if "--config" in sys.argv:
			
 
				+        config_path = sys.argv[sys.argv.index("--config") + 1]
			
 
				+
			
 
				+    CONFIG = load_config(config_path)
			
 
				+
			
 
				+    log_level = CONFIG.get("logging", {}).get("level", "INFO")
			
 
				+    logging.basicConfig(
			
 
				+        level=getattr(logging, log_level),
			
 
				+        format="%(asctime)s %(levelname)s %(message)s",
			
 
				+    )
			
 
				+
			
 
				+    app = ApplicationBuilder().token(CONFIG["telegram"]["token"]).build()
			
 
				+    app.add_handler(CommandHandler("start", cmd_start))
			
 
				+    app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
			
 
				+
			
 
				+    logging.info("OpenClaw starting with model: %s", CONFIG["ollama"]["model"])
			
 
				+    app.run_polling()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/templates/openclaw/config.yml.j2
+++ b/templates/openclaw/config.yml.j2
@@ -0,0 +1,17 @@
 
				+telegram:
			
 
				+  token: "{{ telegram_token }}"
			
 
				+
			
 
				+ollama:
			
 
				+  base_url: "http://localhost:11434"
			
 
				+  model: "{{ openclaw_model | default('llama3.2:3b') }}"
			
 
				+  api_key: "{{ ollama_api_key }}"
			
 
				+  timeout: 120
			
 
				+
			
 
				+bot:
			
 
				+  max_message_length: 4096
			
 
				+  typing_indicator: true
			
 
				+  error_message: "Sorry, I encountered an error. Please try again."
			
 
				+
			
 
				+logging:
			
 
				+  level: "INFO"
			
 
				+  file: "/var/log/openclaw/openclaw.log"
			
--- a/templates/openclaw/openclaw.service.j2
+++ b/templates/openclaw/openclaw.service.j2
@@ -0,0 +1,16 @@
 
				+[Unit]
			
 
				+Description=OpenClaw Telegram Bot
			
 
				+After=network.target
			
 
				+
			
 
				+[Service]
			
 
				+Type=simple
			
 
				+ExecStart=/usr/bin/python3 {{ openclaw_data_dir }}/bot.py --config {{ openclaw_data_dir }}/config.yml
			
 
				+Restart=on-failure
			
 
				+RestartSec=5s
			
 
				+StandardOutput=journal
			
 
				+StandardError=journal
			
 
				+SyslogIdentifier=openclaw
			
 
				+WorkingDirectory={{ openclaw_data_dir }}
			
 
				+
			
 
				+[Install]
			
 
				+WantedBy=multi-user.target
			
--- a/templates/systemd/ollama-warmup.service.j2
+++ b/templates/systemd/ollama-warmup.service.j2
@@ -0,0 +1,14 @@
 
				+[Unit]
			
 
				+Description=Ollama Model Warm-Up
			
 
				+After=ollama.service
			
 
				+Requires=ollama.service
			
 
				+
			
 
				+[Service]
			
 
				+Type=oneshot
			
 
				+RemainAfterExit=yes
			
 
				+ExecStart=/usr/local/bin/ollama-warmup.sh
			
 
				+StandardOutput=journal
			
 
				+StandardError=journal
			
 
				+
			
 
				+[Install]
			
 
				+WantedBy=multi-user.target
			
--- a/templates/vault/vault-unseal.sh.j2
+++ b/templates/vault/vault-unseal.sh.j2
@@ -0,0 +1,42 @@
 
				+#!/bin/bash
			
 
				+# Vault auto-unseal script
			
 
				+# Reads unseal key from vault-init.json and unseals Vault
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+VAULT_ADDR="http://127.0.0.1:8200"
			
 
				+INIT_FILE="/docker_mounts/vault/vault-init.json"
			
 
				+
			
 
				+if [ ! -f "$INIT_FILE" ]; then
			
 
				+    echo "ERROR: vault-init.json not found at $INIT_FILE"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+UNSEAL_KEY=$(jq -r '.unseal_keys_b64[0]' "$INIT_FILE")
			
 
				+
			
 
				+if [ -z "$UNSEAL_KEY" ]; then
			
 
				+    echo "ERROR: Could not extract unseal key from $INIT_FILE"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+# Wait for Vault to be ready
			
 
				+for i in $(seq 1 30); do
			
 
				+    STATUS=$(curl -sf "${VAULT_ADDR}/v1/sys/health" 2>/dev/null || true)
			
 
				+    if [ -n "$STATUS" ]; then
			
 
				+        SEALED=$(echo "$STATUS" | jq -r '.sealed')
			
 
				+        if [ "$SEALED" = "false" ]; then
			
 
				+            echo "Vault is already unsealed."
			
 
				+            exit 0
			
 
				+        fi
			
 
				+        break
			
 
				+    fi
			
 
				+    echo "Waiting for Vault... ($i/30)"
			
 
				+    sleep 2
			
 
				+done
			
 
				+
			
 
				+echo "Unsealing Vault..."
			
 
				+curl -sf -X PUT "${VAULT_ADDR}/v1/sys/unseal" \
			
 
				+    -H "Content-Type: application/json" \
			
 
				+    -d "{\"key\": \"${UNSEAL_KEY}\"}"
			
 
				+
			
 
				+echo "Vault unsealed successfully."
			
--- a/templates/vault/vault.hcl.j2
+++ b/templates/vault/vault.hcl.j2
@@ -0,0 +1,11 @@
 
				+storage "file" {
			
 
				+  path = "{{ vault_data_dir }}"
			
 
				+}
			
 
				+
			
 
				+listener "tcp" {
			
 
				+  address     = "0.0.0.0:{{ vault_port }}"
			
 
				+  tls_disable = 1
			
 
				+}
			
 
				+
			
 
				+ui       = true
			
 
				+api_addr = "https://vault.{{ domain }}"
			
--- a/vault/.gitignore
+++ b/vault/.gitignore
@@ -0,0 +1,4 @@
 
				+# Vault secrets — NEVER commit these files
			
 
				+.vault-token
			
 
				+.vault-init.json
			
 
				+keycloak-credentials.txt