Переглянути джерело

Merge branch 'feature/three-pass-benchmark' of sarman/tftsr_ai into master

Shaun Arman 1 день тому
батько
коміт
342cbd123d
90 змінених файлів з 4903 додано та 410 видалено
  1. 22 15
      CLAUDE.md
  2. 45 15
      README.md
  3. 117 66
      benchmarks/README.md
  4. 17 6
      benchmarks/results/benchmark_20260307T170059.md
  5. 92 0
      benchmarks/results/benchmark_20260307T184212.md
  6. 147 0
      benchmarks/results/benchmark_20260308T003605.md
  7. 70 0
      benchmarks/results/benchmark_20260308T145246.md
  8. 57 0
      benchmarks/results/benchmark_20260308T215747.md
  9. 54 0
      benchmarks/results/benchmark_20260309T080551.md
  10. 47 0
      benchmarks/results/benchmark_20260309T174604.md
  11. 67 0
      benchmarks/results/benchmark_20260310T094843.md
  12. 117 0
      benchmarks/results/benchmark_20260310T102149.md
  13. 94 0
      benchmarks/results/benchmark_20260310T110632.md
  14. 107 0
      benchmarks/results/benchmark_20260310T122818.md
  15. 107 0
      benchmarks/results/benchmark_20260310T160815.md
  16. 78 0
      benchmarks/results/benchmark_20260310T170013.md
  17. 433 0
      benchmarks/results/benchmark_review_20260310.md
  18. 151 71
      benchmarks/results/model_selection.json
  19. 26 10
      inventory/group_vars/all.yml
  20. 49 0
      playbooks/01_vault.yml
  21. 44 2
      playbooks/02_infrastructure.yml
  22. 165 37
      playbooks/03_benchmark.yml
  23. 74 7
      playbooks/04_models.yml
  24. 1 1
      playbooks/07_openwebui.yml
  25. 21 0
      playbooks/08_openclaw.yml
  26. 171 0
      playbooks/_bench_tier_batch.yml
  27. 75 45
      roles/models/README.md
  28. 52 41
      roles/ollama/README.md
  29. 31 25
      roles/openclaw/README.md
  30. 36 39
      roles/openwebui/README.md
  31. 26 0
      templates/ollama/ollama-node0.service.j2
  32. 16 15
      templates/ollama/override.conf.j2
  33. 28 0
      templates/ollama/warmup-node0.sh.j2
  34. 4 4
      templates/ollama/warmup.sh.j2
  35. 14 0
      templates/systemd/ollama-warmup-node0.service.j2
  36. 15 0
      templates/vault/vault-unseal.service.j2
  37. 11 11
      templates/vault/vault-unseal.sh.j2
  38. 74 0
      tftsr_nginx-hardening/CLAUDE.md
  39. 4 0
      tftsr_nginx-hardening/ansible.cfg
  40. 4 0
      tftsr_nginx-hardening/inventory/hosts.yml
  41. 73 0
      tftsr_nginx-hardening/nginx-hardening/CLAUDE.md
  42. 179 0
      tftsr_nginx-hardening/nginx-hardening/README.md
  43. 4 0
      tftsr_nginx-hardening/nginx-hardening/ansible.cfg
  44. 7 0
      tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml
  45. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml
  46. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml
  47. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml
  48. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml
  49. 7 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml
  50. 5 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml
  51. 41 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml
  52. 22 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2
  53. 3 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
  54. 3 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
  55. 509 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml
  56. 4 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml
  57. 103 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml
  58. 26 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
  59. 15 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml
  60. 5 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml
  61. 44 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml
  62. 8 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
  63. 8 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
  64. 17 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
  65. 10 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
  66. 71 0
      tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh
  67. 7 0
      tftsr_nginx-hardening/nginx-hardening/site.yml
  68. 6 0
      tftsr_nginx-hardening/playbooks/fail2ban.yml
  69. 6 0
      tftsr_nginx-hardening/playbooks/geo_blocking.yml
  70. 6 0
      tftsr_nginx-hardening/playbooks/nginx_hardening.yml
  71. 6 0
      tftsr_nginx-hardening/playbooks/update_geo_blocks.yml
  72. 7 0
      tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml
  73. 5 0
      tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml
  74. 41 0
      tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml
  75. 22 0
      tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2
  76. 3 0
      tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
  77. 3 0
      tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
  78. 509 0
      tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml
  79. 4 0
      tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml
  80. 103 0
      tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml
  81. 26 0
      tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
  82. 31 0
      tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml
  83. 5 0
      tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml
  84. 44 0
      tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml
  85. 8 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
  86. 8 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
  87. 17 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
  88. 10 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
  89. 71 0
      tftsr_nginx-hardening/scripts/download-geo-zones.sh
  90. 8 0
      tftsr_nginx-hardening/site.yml

+ 22 - 15
CLAUDE.md

@@ -6,22 +6,26 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ```bash
 # Full deployment
-ansible-playbook playbooks/site.yml
+ansible-playbook playbooks/site.yml -K -e @local.yml
 
 # Run a single playbook
-ansible-playbook playbooks/03_benchmark.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
 
 # Run with tags (each playbook defines granular tags)
-ansible-playbook playbooks/site.yml --tags ollama,docker
+ansible-playbook playbooks/site.yml --tags ollama,docker -K -e @local.yml
 
 # Benchmark and update warm-up slots in one shot
-ansible-playbook playbooks/03_benchmark.yml && ansible-playbook playbooks/04_models.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
 
-# Override slot 4 with a specific model
-ansible-playbook playbooks/04_models.yml -e "slot4_model=qwen2.5-coder:7b"
+# Rotate general slot (Node 1, port 11434)
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
+
+# Rotate coding slot (Node 0, port 11435)
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
 
 # Run against a subset of hosts
-ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy
+ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy -K -e @local.yml
 
 # Lint playbooks
 ansible-lint playbooks/
@@ -30,7 +34,7 @@ ansible-lint playbooks/
 ansible-galaxy collection install -r requirements.yml
 
 # Check mode (dry run)
-ansible-playbook playbooks/site.yml --check --diff
+ansible-playbook playbooks/site.yml --check --diff -K -e @local.yml
 ```
 
 ## Required Local Configuration
@@ -87,17 +91,20 @@ All credentials live exclusively in Vault under `secret/data/{{ vault_project_sl
 
 **Composite score formula:**
 ```
-composite = (quality × 0.45) + (tokens_per_sec / 30, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
+composite = (quality × 0.45) + (tokens_per_sec / ceiling, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
 ```
+`benchmark_toks_norm_ceiling` defaults to 40 (dual-socket target).
+
+**Slot classification:** if `coding_composite - general_composite >= 0.10` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
 
-**Slot classification:** if `coding_composite - general_composite >= 0.15` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
+**6 warm-up slots across two NUMA instances:**
+- Node 1 (port 11434): slots 1–2 locked general + slot 5 rotatable general
+- Node 0 (port 11435): slots 3–4 locked coding + slot 6 rotatable coding
+- Slots 5/6 rotatable via `-e slot5_model=<name>` / `-e slot6_model=<name>` without re-benchmarking
 
-**4 warm-up slots always hot in RAM:**
-- Slots 1–2: top general-purpose models by composite score
-- Slots 3–4: top coding models by composite score
-- Slot 4 is user-rotatable via `-e slot4_model=<name>` without re-benchmarking
+`04_models.yml` creates Modelfiles (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) and two warmup services: `ollama-warmup.service` (Node 1) and `ollama-warmup-node0.service` (Node 0).
 
-`04_models.yml` creates named Ollama Modelfiles (`coder-128k`, `coder-32k`, `llama-family`, `gemma-family`) and a `ollama-warmup.service` systemd one-shot that pre-loads all 4 slots after Ollama starts.
+**Benchmark alias filter:** `benchmark_skip_aliases` in `group_vars/all.yml` lists the Modelfile aliases — the benchmark playbook excludes these from the test loop to prevent 32k-token KV-cache allocations from stalling the run.
 
 ### Key Variables
 

+ 45 - 15
README.md

@@ -23,7 +23,7 @@ bot access -- all driven by a single `ansible-playbook deploy_ai.yml` command.
           ┌───────────────▼┐    ┌────▼──────────────────────┐
           │ coredns_host   │    │ ai_server                 │
           │ 192.168.1.29   │    │ 192.168.1.100             │
-          │                │    │                            
+          │                │    │                           │
           │ - CoreDNS      │    │ - Ollama (LLM inference)  │
           └────────────────┘    │ - Open WebUI              │
                                 │ - Keycloak (SSO/OIDC)     │
@@ -292,11 +292,13 @@ The benchmark playbook automatically selects the best coding models and keeps th
 Check the current slot assignments in `benchmarks/results/model_selection.json`:
 
 ```bash
-cat benchmarks/results/model_selection.json | python3 -m json.tool | grep slot
+python3 -m json.tool benchmarks/results/model_selection.json | grep slot
 ```
 
-Slots 3 and 4 are always coding-classified models. Use the `slot3_coding` model for
-primary work and `slot4_coding` for a lighter/faster alternative.
+Slots 3–6 are coding-classified models, all running on the Node 0 instance at port 11435.
+Use `slot3_coding` (the highest-scoring coding model) as your primary model. Connect coding
+tools directly to `https://ollama-api.<domain>` (proxied from port 11434, Node 1) or to
+Open WebUI which load-balances across both instances.
 
 ## Day-2 Operations
 
@@ -343,6 +345,13 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
   -e "benchmark_models=qwen2.5-coder:14b-instruct-q4_K_M,codestral:22b-v0.1-q4_K_M"
 ```
 
+**Override tier boundaries or timeouts (see [benchmarks/README.md](benchmarks/README.md#three-pass-execution)):**
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
+```
+
 **Pull recommended models if scores are below threshold:**
 
 ```bash
@@ -355,10 +364,20 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml -e "pull_if_better=
 ansible-playbook playbooks/04_models.yml -K -e @local.yml
 ```
 
-**Rotate slot 4 to a specific model:**
+**Rotate slot 5 (general) or slot 6 (coding) to a specific model:**
 
 ```bash
-ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot4_model=deepseek-r1:14b"
+# Swap general rotate slot
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
+
+# Swap coding rotate slot
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
+
+# Both at once
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
+
+# Reset both rotate slots back to benchmark recommendations
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
 ```
 
 **Redeploy Keycloak only:**
@@ -393,16 +412,25 @@ ansible-playbook playbooks/11_vault_oidc.yml -K -e @local.yml
 
 ## Model Slot System
 
-Four models are kept warm in RAM at all times (`OLLAMA_MAX_LOADED_MODELS=4`, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled by the benchmark playbook — no model names are hardcoded.
+Six models are kept warm across two Ollama instances (`OLLAMA_MAX_LOADED_MODELS=3` each, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled automatically by the benchmark playbook — no model names are hardcoded.
+
+```
+NUMA Node 1 — ollama.service     — port 11434  (general models)
+NUMA Node 0 — ollama-node0.service — port 11435 (coding models)
+```
+
+| Slot | Instance      | Port  | Role                    | Selection                     | Rotation                                    |
+|------|---------------|-------|-------------------------|-------------------------------|---------------------------------------------|
+| 1    | Node 1        | 11434 | General primary (locked) | Top general composite score  | Replaced only by re-benchmark               |
+| 2    | Node 1        | 11434 | General secondary (locked)| 2nd general composite score | Replaced only by re-benchmark               |
+| 5    | Node 1        | 11434 | General rotate           | 3rd general composite score   | `-e slot5_model=<name>`                     |
+| 3    | Node 0        | 11435 | Coding primary (locked)  | Top coding composite score    | Replaced only by re-benchmark               |
+| 4    | Node 0        | 11435 | Coding secondary (locked)| 2nd coding composite score    | Replaced only by re-benchmark               |
+| 6    | Node 0        | 11435 | Coding rotate            | 3rd coding composite score    | `-e slot6_model=<name>`                     |
 
-| Slot | Role                      | Selection                     | Rotation                              |
-|------|---------------------------|-------------------------------|---------------------------------------|
-| 1    | General-purpose primary   | Top general composite score   | Replaced if score < threshold         |
-| 2    | General-purpose secondary | 2nd general composite score   | Replaced if score < threshold         |
-| 3    | Coding primary            | Top coding composite score    | Locked; replaced only by re-benchmark |
-| 4    | Coding secondary          | 2nd coding composite score    | Rotatable: `-e slot4_model=<name>`    |
+**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.10; otherwise `general`.
 
-**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.15; otherwise `general`.
+**Modelfile aliases** (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) are excluded from benchmarking to prevent KV-cache allocation stalls.
 
 ## Verification Steps
 
@@ -416,8 +444,10 @@ After a full `deploy_ai.yml` run, verify the deployment (substitute your actual
 6. **Qdrant health** -- `curl -s http://<ai_server_ip>:6333/healthz` returns OK
 7. **CoreDNS resolution** -- `dig @<coredns_host_ip> vault.example.com` returns `<nginx_proxy_ip>`
 8. **NGINX configs** -- `ssh <nginx_proxy_ip> 'sudo nginx -t'` passes
-9. **OpenClaw** -- send a message to the Telegram bot, confirm response
+9. **OpenClaw** -- send a message to the Telegram bot, confirm response using slot1_general model
 10. **Benchmark report** -- check `benchmarks/results/benchmark_<timestamp>.md` for latest results
+11. **Node 0 Ollama** -- `curl -s -H "Authorization: Bearer <key>" http://<ai_server_ip>:11435/api/tags` returns model list
+12. **Both warmup services** -- `systemctl status ollama-warmup ollama-warmup-node0` both show `active (exited)`
 
 ## Role Reference
 

+ 117 - 66
benchmarks/README.md

@@ -3,133 +3,184 @@
 ## Overview
 
 Dynamic benchmark system for all installed Ollama models. Runs a suite of coding and
-general-purpose tests against every model currently available on the Ollama server,
-scores each model on a composite metric, and assigns models to the 4-slot system
-based on results.
+general-purpose tests against every model on the Ollama server, scores each model on a
+composite metric, and assigns models to the 6-slot dual-socket system based on results.
+
+Modelfile aliases (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`,
+`gemma-family`) are automatically excluded from benchmarking — they share weights with
+real models and their large context window parameters would stall every run with
+285-second KV-cache allocations.
 
 ## How to Run
 
 **Benchmark all installed models:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
 ```
 
 **Benchmark specific models only:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml -e '{"benchmark_specific_models":["qwen2.5-coder:14b","deepseek-coder-v2:16b"]}'
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_models=qwen2.5-coder:14b,deepseek-coder-v2:16b"
 ```
 
-**Benchmark with automatic model pulling if a better model is found:**
+**Benchmark and immediately push 6-slot warm-up selections:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml -e pull_if_better=true
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+## Three-Pass Execution
+
+Models are split into three size tiers before benchmarking. Each tier gets its own
+per-request timeout to avoid small models waiting behind 70 B giants:
+
+| Tier   | RAM threshold | Timeout | Description                       |
+|--------|---------------|---------|-----------------------------------|
+| Small  | < 10 GB       | 300 s   | 7 B and under — fast path         |
+| Medium | 10–15 GB      | 900 s   | 16 B lite / 12 B — standard wait  |
+| Large  | > 15 GB       | 1200 s  | 34 B+ — 20-minute ceiling         |
+
+**Size source vs runtime RAM:** `ollama list` reports on-disk (compressed) sizes, which
+are smaller than actual runtime RAM usage (model weights + KV cache + overhead). A
+`benchmark_size_overhead_factor` (default `1.2`) is applied when computing tier
+boundaries: the disk-size cutoffs are divided by the factor before comparison. For
+example, with default settings a 9 GB on-disk model is treated as ~10.8 GB at runtime
+and falls in the medium tier rather than small.
+
+**Override tier boundaries:**
+
+```bash
+# Adjust where small/medium boundary sits
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
+
+# Tune the overhead factor if your models load larger/smaller than expected
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_size_overhead_factor=1.25"
+
+# Override timeouts only
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_medium_timeout=600 benchmark_large_timeout=1800"
 ```
 
 ## Test Suites
 
 ### Coding Tests
 
-| Test       | Prompt                                                         | What Is Measured              |
-|------------|----------------------------------------------------------------|-------------------------------|
-| `code_gen` | "Write a Python function that implements binary search on a sorted list. Include type hints and docstring." | Correctness (def + return present), code structure, tokens/sec |
-| `debug`    | "Find and fix the bug in this Python code: `def factorial(n): return n * factorial(n)`. Explain the issue." | Identifies base case bug, explanation quality, tokens/sec |
-| `refactor` | "Refactor this code to use list comprehension: `result = []; for i in range(10): if i % 2 == 0: result.append(i*i)`" | Produces list comprehension, conciseness, tokens/sec |
+| Test       | Prompt                                                                     | What Is Measured                                   |
+|------------|----------------------------------------------------------------------------|----------------------------------------------------|
+| `code_gen` | Write a Python merge sort with type hints, docstring, and 3 unit tests     | `def`, `return`, `"""`, `->`, `assert`, `def test_`, `import` |
+| `debug`    | Find and fix 3 bugs in a given Python function                             | `def`, `return`, code block, `assert`              |
+| `refactor` | Refactor a loop for readability and performance                            | `def`, `return`, code block, type hint, `import`   |
 
 ### General Tests
 
-| Test        | Prompt                                                        | What Is Measured              |
-|-------------|---------------------------------------------------------------|-------------------------------|
-| `explain`   | "Explain the concept of recursion to a beginner programmer. Use a simple analogy." | Clarity, analogy presence, length adequacy, tokens/sec |
-| `creative`  | "Write a short poem about artificial intelligence."           | Creativity (line count, poetic structure), tokens/sec |
-| `reasoning` | "A farmer has 17 sheep. All but 9 die. How many are left? Explain your reasoning step by step." | Correct answer (9), step-by-step reasoning, tokens/sec |
+| Test        | Prompt                                                     | What Is Measured                                     |
+|-------------|------------------------------------------------------------|------------------------------------------------------|
+| `explain`   | Explain how Python's GIL works and when it matters         | Response length, paragraph structure, list formatting |
+| `creative`  | Suggest 5 fun family activities for a rainy weekend        | Response length, paragraph structure, list formatting |
+| `reasoning` | Apple arithmetic word problem                              | Response length, paragraph structure, list formatting |
 
 ### Latency Test
 
-| Test      | Prompt | What Is Measured           |
-|-----------|--------|----------------------------|
-| `latency` | "Hi"   | Time to first token (TTFT) |
+| Test      | Prompt | What Is Measured                                   |
+|-----------|--------|----------------------------------------------------|
+| `latency` | "Hi"   | Total response time (eval + prompt eval), used as TTFT proxy |
 
 ## Scoring
 
-### Metrics Collected from Ollama API
-
-- **tokens/sec** -- generation throughput from `/api/generate` response
-- **TTFT** (time to first token) -- measured from request start to first streamed token
-- **Quality heuristics** -- regex and length checks specific to each test type
-
 ### Composite Score Formula
 
 For each category (coding, general), a composite score is calculated:
 
 ```
-composite = (quality * 0.45) + (tokens_per_sec_normalized * 0.30) + (latency_score * 0.25)
+composite = (quality * 0.45) + (tokens_per_sec / ceiling, capped 1.0) * 0.30
+          + (1 - ttft_ms / 5000, floored 0) * 0.25
 ```
 
 Where:
-- `quality` is 0.0-1.0 based on heuristic checks for the test type
-- `tokens_per_sec_normalized` is the model's tokens/sec divided by the fastest model's tokens/sec
-- `latency_score` is 1.0 - (model_ttft / slowest_ttft)
+- `quality` — 0.0–1.0 from heuristic checks per test type (see CLAUDE.md for weights)
+- `tokens_per_sec` — averaged across all test responses; normalized against `benchmark_toks_norm_ceiling` (default 40)
+- `ttft_ms` — latency test response time in milliseconds
 
 ### Classification Rule
 
-A model is classified as a **coding** model if:
+A model is classified as **coding** if:
 
 ```
-coding_composite - general_composite >= 0.15
+coding_composite - general_composite >= benchmark_coding_threshold   # default 0.10
 ```
 
-Otherwise it is classified as **general**.
+Name-pattern heuristics (`coder`, `codestral`, `codellama`, `starcoder`) apply as a
+tiebreaker. Category can also be forced with `model_category_overrides` in `group_vars/all.yml`.
 
 ## Thresholds and Configuration
 
-All thresholds are configurable via `group_vars/all.yml`:
-
-| Key                            | Default | Description                                    |
-|--------------------------------|---------|------------------------------------------------|
-| `benchmark_min_tokens_per_sec` | 10      | Minimum tokens/sec to pass a model             |
-| `benchmark_max_ttft_ms`        | 5000    | Maximum time to first token in milliseconds    |
-| `benchmark_quality_weight`     | 0.45    | Weight of quality score in composite            |
-| `benchmark_speed_weight`       | 0.30    | Weight of tokens/sec in composite               |
-| `benchmark_latency_weight`     | 0.25    | Weight of latency score in composite            |
-| `benchmark_coding_threshold`   | 0.15    | Minimum coding-general delta for coding classification |
+All thresholds are configurable in `inventory/group_vars/all.yml`:
+
+| Key                               | Default | Description                                            |
+|-----------------------------------|---------|--------------------------------------------------------|
+| `benchmark_thresholds.min_tokens_per_sec`  | 5.0  | Minimum tok/sec to be slot-eligible          |
+| `benchmark_thresholds.min_quality_score`   | 0.6  | Minimum quality score to be slot-eligible    |
+| `benchmark_thresholds.min_composite_score` | 0.55 | Minimum composite to avoid threshold warning |
+| `benchmark_toks_norm_ceiling`     | 40      | tok/sec ceiling for normalization (dual-socket target) |
+| `benchmark_coding_threshold`      | 0.10    | coding-general composite delta for classification      |
+| `benchmark_small_max_gb`          | 10      | Runtime RAM upper bound for small pass (GB)            |
+| `benchmark_medium_max_gb`         | 15      | Runtime RAM upper bound for medium pass (GB)           |
+| `benchmark_size_overhead_factor`  | 1.2     | Multiplier applied to `ollama list` disk sizes to estimate runtime RAM |
+| `benchmark_small_timeout`         | 300     | Per-request timeout for small models (seconds)         |
+| `benchmark_medium_timeout`        | 900     | Per-request timeout for medium models (seconds)        |
+| `benchmark_large_timeout`         | 1200    | Per-request timeout for large models (seconds)         |
+| `benchmark_skip_aliases`          | see below| Modelfile aliases excluded from benchmark loop        |
+
+Default `benchmark_skip_aliases`:
+```yaml
+- coder-128k
+- coder-32k
+- coder-rotate
+- llama-family
+- gemma-family
+```
 
 ## Output Format
 
 ### Benchmark Report
 
-Each run produces `benchmarks/benchmark_<timestamp>.md` with a results table:
+Each run produces `benchmarks/results/benchmark_<timestamp>.md`. The slot table now
+covers all 6 slots across both NUMA instances:
 
 ```
-| Model                  | Coding Composite | General Composite | Classification | Tokens/sec | TTFT (ms) |
-|------------------------|------------------|-------------------|----------------|------------|-----------|
-| qwen2.5-coder:14b      | 0.82             | 0.65              | coding         | 38.2       | 420       |
-| deepseek-coder-v2:16b  | 0.78             | 0.63              | coding         | 35.1       | 510       |
-| llama3.1:8b            | 0.61             | 0.74              | general        | 52.3       | 280       |
-| mistral:7b             | 0.58             | 0.71              | general        | 55.8       | 250       |
+| Slot | Socket              | Role            | Model                     | Composite |
+|------|---------------------|-----------------|---------------------------|-----------|
+| 1    | Node 1 (port 11434) | General (locked)| llama3.1:8b               | 0.74      |
+| 2    | Node 1 (port 11434) | General (locked)| mistral:latest            | 0.71      |
+| 5    | Node 1 (port 11434) | General (rotate)| llama3.2:3b               | 0.63      |
+| 3    | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b     | 0.82      |
+| 4    | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b          | 0.78      |
+| 6    | Node 0 (port 11435) | Coding (rotate) | codegemma:7b              | 0.69      |
 ```
 
-### Model Selection File
+### model_selection.json
 
-Results are also written to `model_selection.json`:
+Results are written to `benchmarks/results/model_selection.json`:
 
 ```json
 {
-  "timestamp": "2025-01-15T10:30:00Z",
-  "slot1_coding": "qwen2.5-coder:14b",
-  "slot2_general": "llama3.1:8b",
-  "slot3_backup": "deepseek-coder-v2:16b",
-  "slot4_experimental": null,
-  "results": { ... }
+  "slot1_general": "llama3.1:8b",
+  "slot2_general": "mistral:latest",
+  "slot5_general_rotate": "llama3.2:3b",
+  "slot3_coding": "deepseek-coder-v2:16b",
+  "slot4_coding": "qwen2.5-coder:7b",
+  "slot6_coding_rotate": "codegemma:7b",
+  "general_ranking": [...],
+  "coding_ranking": [...],
+  "all_metrics": { ... }
 }
 ```
 
-## Slot Selection
-
-Slots are assigned from benchmark results as follows:
-
-1. **Slot 1 (Primary Coding)** -- model with the highest `coding_composite` score
-2. **Slot 2 (Primary General)** -- model with the highest `general_composite` score
-3. **Slot 3 (Secondary / Backup)** -- next-best model by overall average composite
-4. **Slot 4 (Experimental)** -- not assigned by benchmarks; set manually via `-e slot4_model=<name>`
+This file is read by `04_models.yml` to decide what to pull and warm up. It is committed
+to the repo so slot selections survive a clean checkout.

+ 17 - 6
benchmarks/results/benchmark_20260307T170059.md

@@ -1,15 +1,20 @@
 # Benchmark Results - 20260307T170059
 
 ## Model Selection
-| Slot | Role | Model | Composite Score |
-|------|------|-------|----------------|
-| 1 | General (Primary) | llama3.2:3b | 0.967 |
-| 2 | General (Secondary) | llama3.2:3b | 0.967 |
-| 3 | Coding (Primary) | deepseek-coder-v2 | 0.738 |
-| 4 | Coding (Secondary) | qwen2.5-coder:7b | 0.63 |
+
+
+| Slot | Role                | Model             | Composite Score |
+| ---- | ------------------- | ----------------- | --------------- |
+| 1    | General (Primary)   | llama3.2:3b       | 0.967           |
+| 2    | General (Secondary) | llama3.2:3b       | 0.967           |
+| 3    | Coding (Primary)    | deepseek-coder-v2 | 0.738           |
+| 4    | Coding (Secondary)  | qwen2.5-coder:7b  | 0.63            |
+
 
 ## Detailed Metrics
+
 ### deepseek-coder-v2
+
 - **Category**: coding
 - **Coding Quality**: 0.667
 - **General Quality**: 0.918
@@ -17,7 +22,9 @@
 - **Latency (ms)**: 1744.5
 - **Coding Composite**: 0.738
 - **General Composite**: 0.852
+
 ### qwen2.5-coder:7b
+
 - **Category**: coding
 - **Coding Quality**: 0.64
 - **General Quality**: 0.922
@@ -25,7 +32,9 @@
 - **Latency (ms)**: 1211.5
 - **Coding Composite**: 0.63
 - **General Composite**: 0.757
+
 ### llama3.2:3b
+
 - **Category**: general
 - **Coding Quality**: 0.607
 - **General Quality**: 0.991
@@ -35,7 +44,9 @@
 - **General Composite**: 0.967
 
 ## Scoring Formula
+
 - Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
 - Speed normalized against 22 tok/sec ceiling (hardware-observed max)
 - Coding quality: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
 - Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 92 - 0
benchmarks/results/benchmark_20260307T184212.md

@@ -0,0 +1,92 @@
+# Benchmark Results - 20260307T184212
+
+## Model Selection
+| Slot | Role | Model | Composite Score |
+|------|------|-------|----------------|
+| 1 | General (Primary) | llama3.2:3b | 0.001 |
+| 2 | General (Secondary) | gemma-family:latest | 0.0 |
+| 3 | Coding (Primary) | coder-128k:latest | 0.001 |
+| 4 | Coding (Secondary) | coder-32k:latest | 0.001 |
+
+## Detailed Metrics
+### gemma-family:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama-family:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### coder-128k:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 285394.5
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### coder-32k:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 142328.6
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 143942.9
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 139756.5
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 22 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 147 - 0
benchmarks/results/benchmark_20260308T003605.md

@@ -0,0 +1,147 @@
+# Benchmark Results - 20260308T003605
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                 | Composite Score |
+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.001           |
+| 2    | Node 1 (port 11434) | General (locked) | command-r:35b         | 0.0             |
+| 5    | Node 1 (port 11434) | General (rotate) | llama3.1:70b          | 0.0             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | codellama:34b         | 0.0             |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.0             |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:14b     | 0.0             |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0.008
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 221414.9
+- **Coding Composite**: 0.0
+- **General Composite**: 0.004
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### qwen2.5-coder:14b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 239690.0
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### command-r:35b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 169971.8
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.1:70b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### mistral-nemo:latest
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### mistral:latest
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 130127.2
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 70 - 0
benchmarks/results/benchmark_20260308T145246.md

@@ -0,0 +1,70 @@
+# Benchmark Results - 20260308T145246
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
+| 2 | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.0 |
+| 5 | Node 1 (port 11434) | General (rotate) | mistral:latest | 0.0 |
+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
+
+## Detailed Metrics
+### mistral-nemo:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### mistral:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 109301.3
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 57 - 0
benchmarks/results/benchmark_20260308T215747.md

@@ -0,0 +1,57 @@
+# Benchmark Results - 20260308T215747
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model               | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b         | 0.45            |
+| 2    | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.45            |
+| 5    | Node 1 (port 11434) | General (rotate) | none                | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                | N/A             |
+
+
+## Detailed Metrics
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.917
+- **General Quality**: 1.0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.413
+- **General Composite**: 0.45
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.823
+- **General Quality**: 0.85
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.371
+- **General Composite**: 0.383
+
+### mistral-nemo:latest
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 1.0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.383
+- **General Composite**: 0.45
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 54 - 0
benchmarks/results/benchmark_20260309T080551.md

@@ -0,0 +1,54 @@
+# Benchmark Results - 20260309T080551
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
+| 2 | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M | 0.0 |
+| 5 | Node 1 (port 11434) | General (rotate) | none | N/A |
+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.316 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.0 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
+
+## Detailed Metrics
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 676104.3
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 154480.0
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 722357.3
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.7
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 145493.5
+- **Coding Composite**: 0.316
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 47 - 0
benchmarks/results/benchmark_20260309T174604.md

@@ -0,0 +1,47 @@
+# Benchmark Results - 20260309T174604
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model            | Composite Score |
+| ---- | ------------------- | ---------------- | ---------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
+| 5    | Node 1 (port 11434) | General (rotate) | none             | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none             | N/A             |
+
+
+## Detailed Metrics
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 108021.2
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 146781.6
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 67 - 0
benchmarks/results/benchmark_20260310T094843.md

@@ -0,0 +1,67 @@
+# Benchmark Results - 20260310T094843
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.814           |
+| 2    | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M     | 0.484           |
+| 5    | Node 1 (port 11434) | General (rotate) | none                     | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.693           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b         | 0.638           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                     | N/A             |
+
+
+## Detailed Metrics
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 22.8
+- **Latency (ms)**: 1612.6
+- **Coding Composite**: 0.693
+- **General Composite**: 0.739
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.4
+- **Latency (ms)**: 661.8
+- **Coding Composite**: 0.767
+- **General Composite**: 0.814
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.5
+- **Latency (ms)**: 5730.8
+- **Coding Composite**: 0.431
+- **General Composite**: 0.484
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1359.5
+- **Coding Composite**: 0.638
+- **General Composite**: 0.687
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 117 - 0
benchmarks/results/benchmark_20260310T102149.md

@@ -0,0 +1,117 @@
+# Benchmark Results - 20260310T102149
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.819           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.621           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.484           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.707           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.681           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest     | 0.644           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4350.0
+- **Coding Composite**: 0.409
+- **General Composite**: 0.32
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.6
+- **Latency (ms)**: 1586.8
+- **Coding Composite**: 0.707
+- **General Composite**: 0.753
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2223.7
+- **Coding Composite**: 0.549
+- **General Composite**: 0.608
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 22.2
+- **Latency (ms)**: 1759.1
+- **Coding Composite**: 0.681
+- **General Composite**: 0.727
+
+### qwen2.5-coder:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1239.2
+- **Coding Composite**: 0.644
+- **General Composite**: 0.694
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.8
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2251.2
+- **Coding Composite**: 0.586
+- **General Composite**: 0.621
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.3
+- **Latency (ms)**: 1258.3
+- **Coding Composite**: 0.639
+- **General Composite**: 0.689
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 5701.3
+- **Coding Composite**: 0.432
+- **General Composite**: 0.484
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.7
+- **Latency (ms)**: 613.5
+- **Coding Composite**: 0.772
+- **General Composite**: 0.819
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 94 - 0
benchmarks/results/benchmark_20260310T110632.md

@@ -0,0 +1,94 @@
+# Benchmark Results - 20260310T110632
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
+
+## Detailed Metrics
+### codellama:34b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4244.1
+- **Coding Composite**: 0.437
+- **General Composite**: 0.326
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 25.0
+- **Latency (ms)**: 1543.2
+- **Coding Composite**: 0.735
+- **General Composite**: 0.758
+### deepseek-coder-v2:16b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.5
+- **Latency (ms)**: 1415.1
+- **Coding Composite**: 0.738
+- **General Composite**: 0.762
+### qwen2.5-coder:14B
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2195.9
+- **Coding Composite**: 0.572
+- **General Composite**: 0.609
+### qwen2.5-coder:latest
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1228.2
+- **Coding Composite**: 0.667
+- **General Composite**: 0.694
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2249.3
+- **Coding Composite**: 0.596
+- **General Composite**: 0.621
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.7
+- **Latency (ms)**: 1231.9
+- **Coding Composite**: 0.666
+- **General Composite**: 0.693
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.4
+- **Latency (ms)**: 6355.8
+- **Coding Composite**: 0.441
+- **General Composite**: 0.483
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.3
+- **Latency (ms)**: 644.2
+- **Coding Composite**: 0.785
+- **General Composite**: 0.814
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 107 - 0
benchmarks/results/benchmark_20260310T122818.md

@@ -0,0 +1,107 @@
+# Benchmark Results - 20260310T122818
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                 | Composite Score |
+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.835           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b           | 0.624           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M  | 0.481           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.727           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b      | 0.674           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest  | 0.671           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4261.3
+- **Coding Composite**: 0.436
+- **General Composite**: 0.325
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.1
+- **Latency (ms)**: 1583.1
+- **Coding Composite**: 0.727
+- **General Composite**: 0.75
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2172.1
+- **Coding Composite**: 0.573
+- **General Composite**: 0.61
+
+### qwen2.5-coder:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.4
+- **Latency (ms)**: 1102.0
+- **Coding Composite**: 0.671
+- **General Composite**: 0.698
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.9
+- **Latency (ms)**: 2186.7
+- **Coding Composite**: 0.6
+- **General Composite**: 0.624
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.6
+- **Latency (ms)**: 1073.7
+- **Coding Composite**: 0.674
+- **General Composite**: 0.701
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.2
+- **Latency (ms)**: 6142.8
+- **Coding Composite**: 0.439
+- **General Composite**: 0.481
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 24.5
+- **Latency (ms)**: 568.5
+- **Coding Composite**: 0.806
+- **General Composite**: 0.835
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 107 - 0
benchmarks/results/benchmark_20260310T160815.md

@@ -0,0 +1,107 @@
+# Benchmark Results - 20260310T160815
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.832           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.624           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.482           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.737           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.735           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:7b         | 0.666           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4336.2
+- **Coding Composite**: 0.432
+- **General Composite**: 0.321
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.1
+- **Latency (ms)**: 1411.4
+- **Coding Composite**: 0.735
+- **General Composite**: 0.759
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.2
+- **Latency (ms)**: 1383.8
+- **Coding Composite**: 0.737
+- **General Composite**: 0.76
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2181.0
+- **Coding Composite**: 0.573
+- **General Composite**: 0.609
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2183.4
+- **Coding Composite**: 0.6
+- **General Composite**: 0.624
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.6
+- **Latency (ms)**: 1210.0
+- **Coding Composite**: 0.666
+- **General Composite**: 0.693
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.2
+- **Latency (ms)**: 5540.1
+- **Coding Composite**: 0.44
+- **General Composite**: 0.482
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 24.2
+- **Latency (ms)**: 581.0
+- **Coding Composite**: 0.803
+- **General Composite**: 0.832
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 78 - 0
benchmarks/results/benchmark_20260310T170013.md

@@ -0,0 +1,78 @@
+# Benchmark Results - 20260310T170013
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.57 |
+
+## Detailed Metrics
+### codellama:34b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4235.4
+- **Coding Composite**: 0.437
+- **General Composite**: 0.326
+### deepseek-coder-v2:16b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 23.5
+- **Latency (ms)**: 1568.5
+- **Coding Composite**: 0.723
+- **General Composite**: 0.746
+### qwen2.5-coder:14B
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2229.7
+- **Coding Composite**: 0.57
+- **General Composite**: 0.607
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2202.0
+- **Coding Composite**: 0.599
+- **General Composite**: 0.623
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.5
+- **Latency (ms)**: 1431.0
+- **Coding Composite**: 0.655
+- **General Composite**: 0.682
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.1
+- **Latency (ms)**: 5941.9
+- **Coding Composite**: 0.439
+- **General Composite**: 0.481
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 23.0
+- **Latency (ms)**: 754.8
+- **Coding Composite**: 0.786
+- **General Composite**: 0.814
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 433 - 0
benchmarks/results/benchmark_review_20260310.md

@@ -0,0 +1,433 @@
+# Ticket Summary — Post-Change Benchmark Review: num_predict 300 → 500
+
+## Description
+
+After resolving the dual NUMA/CPUAffinity performance regression (2026-03-10), two
+post-fix benchmark runs were executed to validate the effect of raising
+`benchmark_num_predict` from 300 to 500. This document captures the four-run history,
+before/after comparison, full Run 4 model results, and findings on system tuning state.
+
+---
+
+## Acceptance Criteria
+
+- [x] Run 3 (num_predict=300) and Run 4 (num_predict=500) compared on common models
+- [x] All tuning variables reviewed and declared optimal or requiring action
+- [x] Any model-identity anomalies flagged for follow-up
+- [x] MEMORY.md updated with current variable values
+- [x] This ticket summary written to `benchmarks/results/`
+
+---
+
+## Work Implemented
+
+### Run History
+
+| Run | Timestamp | Condition | Result |
+|-----|-----------|-----------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300, 4 models | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | Post-NUMA-fix, num_predict=500, 9 models | quality=0.83–0.97, tok/sec=3.2–25.0 |
+
+### Before vs. After (Runs 3 → 4, common models)
+
+| Model | coding_quality @ 300 | coding_quality @ 500 | Delta |
+|-------|---------------------|---------------------|-------|
+| deepseek-coder-v2:latest | 0.783 | 0.833 | +0.050 |
+| qwen2.5-coder:7b | 0.800 | 0.850 | +0.050 |
+| llama3.2:3b | 0.850 | 0.890 | +0.040 |
+| gemma3:12b-it-q4_K_M | 0.850 | 0.873 | +0.023 |
+
+### Full Run 4 Results (num_predict=500, 9 models)
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_composite | general_composite | category |
+|-------|---------|----------|-----------|------------|-----------------|------------------|----------|
+| deepseek-coder-v2:16b | 24.5 | 0.833 | 0.885 | 1415.1 | 0.738 | 0.762 | coding |
+| deepseek-coder-v2:latest | 25.0 | 0.833 | 0.885 | 1543.2 | 0.735 | 0.758 | coding |
+| qwen2.5-coder:latest | 12.8 | 0.850 | 0.910 | 1228.2 | 0.667 | 0.694 | coding |
+| qwen2.5-coder:7b | 12.7 | 0.850 | 0.910 | 1231.9 | 0.666 | 0.693 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2195.9 | 0.572 | 0.609 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4244.1 | 0.437 | 0.326 | coding |
+| llama3.2:3b | 22.3 | 0.890 | 0.954 | 644.2 | 0.785 | 0.814 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2249.3 | 0.596 | 0.621 | general |
+| gemma3:12b-it-q4_K_M | 6.4 | 0.873 | 0.966 | 6355.8 | 0.441 | 0.483 | general |
+
+### Current Slot Assignments (model_selection.json)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
+
+### Tuning Variable Status
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is now the binding constraint |
+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at current 3–25 tok/sec speeds |
+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 62.5% of ceiling |
+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback handling remaining cases |
+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving platform |
+
+### Findings
+
+**Finding 1 — num_predict=500 confirmed correct.** Every model improved on coding_quality
+(+0.023 to +0.050). No timeouts observed. The rubric ceiling is now the binding constraint;
+further increases (700+) would yield at most +0.02 additional improvement.
+
+**Finding 2 — Coding quality inversion narrowed (expected, not a bug).** Coding specialists
+score lower on coding than general quality because general prompts don't require `assert`,
+`test_def`, or `type_hint` (the hardest scoring markers). The gap halved from ~−0.110 to
+~−0.052 vs. Run 3, confirming truncation was part of the cause. Name-pattern fallback
+continues to correctly classify these models.
+
+**Finding 3 — deepseek-coder-v2:16b and :latest may be the same weights (ACTION REQUIRED).**
+Both share identical quality scores (0.833/0.885) and nearly identical throughput (24.5 vs.
+25.0 tok/sec). In Ollama, `:latest` typically resolves to the same weights as the default
+variant. If confirmed identical, slots 3 and 4 hold duplicate models — zero benefit, wasted
+VRAM. See Testing Needed for verification steps.
+
+**Finding 4 — qwen2.5-coder:latest and :7b are near-identical (informational).** Composites
+of 0.667 vs. 0.666. Lower impact since only one is active in slot 6 at a time.
+
+**Finding 5 — llama3.2:3b outperforms coding specialists on coding composite (informational).**
+coding_composite=0.785 beats all dedicated coding models. Mathematically correct: speed
+(22.3 tok/sec) and latency (644ms) dominate. Correctly classified general because
+general_composite (0.814) > coding_composite (0.785), delta < 0.10 threshold.
+
+**Finding 6 — codellama:34b correctly excluded.** 3.2 tok/sec, general_quality=0.586 falls
+below min_quality_score=0.6. Scoring system worked as designed.
+
+---
+
+## Testing Needed
+
+### Finding 3 — Verify deepseek-coder-v2:16b vs :latest digest
+
+Run on `ai_server`:
+
+```bash
+ollama show deepseek-coder-v2:16b --modelfile | grep FROM
+ollama show deepseek-coder-v2:latest --modelfile | grep FROM
+```
+
+**If digests match (same weights):** update `model_selection.json` slot4_coding manually
+(or remove one deepseek variant and re-run `03_benchmark.yml`) to redirect slot 4 to
+`qwen2.5-coder:14B` (composite=0.572) or another diverse candidate for model diversity.
+
+**If digests differ (different weights):** no action — the pipeline is working as designed.
+
+### Regression check after any slot4 change
+
+If slot4 is redirected, run:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+Confirm both warmup services start cleanly:
+
+```bash
+systemctl status ollama-warmup.service ollama-warmup-node0.service
+```
+
+---
+
+# Addendum — Run 5 Review (post deepseek:latest removal)
+
+## Run History (all five runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+
+## Run 4 → Run 5 Comparison (all common models)
+
+| Model | R4 tok/sec | R5 tok/sec | R4 coding_comp | R5 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.5 | 24.1 | 0.738 | 0.727 | −0.011 (noise) |
+| qwen2.5-coder:latest | 12.8 | 12.4 | 0.667 | 0.671 | +0.004 (noise) |
+| qwen2.5-coder:7b | 12.7 | 12.6 | 0.666 | 0.674 | +0.008 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.572 | 0.573 | +0.001 (noise) |
+| llama3.2:3b | 22.3 | 24.5 | 0.785 | 0.806 | +0.021 (notable) |
+| llama3.1:8b | 11.8 | 11.9 | 0.596 | 0.600 | +0.004 (noise) |
+| gemma3:12b-it-q4_K_M | 6.4 | 6.2 | 0.441 | 0.439 | −0.002 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.437 | 0.436 | −0.001 (noise) |
+
+Quality scores (coding_quality, general_quality) are **identical** across both runs —
+confirming rubric scores are stable and deterministic at num_predict=500.
+
+## Run 5 Slot Assignments (model_selection.json)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.835 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.727 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.674 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.671 |
+
+Note: slot4 is `qwen2.5-coder:7b` — the pipeline correctly ranked it #2 coding (0.674),
+superseding the manual `qwen2.5-coder:14B` edit made earlier this session.
+
+## Findings
+
+**Finding 1 — System is stable; tuning parameters remain optimal (no action).** All quality
+scores are identical between Run 4 and Run 5. Speed and latency deltas are within normal
+run-to-run variance (±0.4 tok/sec, ±200ms TTFT). No tuning changes needed.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is binding constraint |
+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at 3–25 tok/sec |
+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 61% of ceiling |
+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback working |
+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving |
+
+**Finding 2 — llama3.2:3b improved after deepseek:latest removal (informational).**
+tok/sec: 22.3 → 24.5 (+2.2), general_composite: 0.814 → 0.835 (+0.021). Likely cause:
+removing one large model reduced memory pressure / NUMA contention during warmup. The 3b
+model benefits most as it runs fastest and competes most for memory bandwidth.
+
+**Finding 3 — qwen2.5-coder:7b and :latest confirmed duplicate weights (RESOLVED).**
+Run 5 slot4=`:7b` (0.674) and slot6=`:latest` (0.671) showed identical quality scores
+(coding=0.850, general=0.910) and nearly identical throughput (~12.4–12.8 tok/sec) across
+both runs — same pattern as the deepseek duplicate. Verified on ai_server:
+
+```
+qwen2.5-coder:7b    → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
+qwen2.5-coder:latest → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
+```
+
+Digests match. `qwen2.5-coder:latest` removed. Next step: re-run `03_benchmark.yml` (Run 6)
+to promote `qwen2.5-coder:14B` to slot6_rotate, achieving genuine speed/quality diversity
+on Node 0:
+- slot3: deepseek-coder-v2:16b — fast+deep (24 tok/sec, 16B)
+- slot4: qwen2.5-coder:7b — fast+light (12.6 tok/sec, 7B)
+- slot6: qwen2.5-coder:14B — slower+richer quality (6.6 tok/sec, 14B)
+
+**Finding 4 — gemma3:12b latency_score=0 is persistent (informational, no action).**
+TTFT consistently 6.1–6.4 seconds, above the 5000ms floor → latency_score=0 every run.
+Hardware-limited (large quant loading time on Node 1), not a tuning issue. The model
+correctly holds slot5_general_rotate on the strength of general_quality=0.966. The latency
+penalty is working as intended.
+
+**Finding 5 — codellama:34b remains correctly excluded (informational, no action).**
+composite=0.436, general_quality=0.586 — below both min_composite_score=0.55 and
+min_quality_score=0.6 every run. Pipeline working as designed.
+
+## Next Action
+
+Run 6: re-benchmark after `qwen2.5-coder:latest` removal to promote `qwen2.5-coder:14B`
+to slot6_rotate and achieve model diversity on Node 0.
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+---
+
+# Addendum — Run 6 Review (post qwen2.5-coder:latest removal)
+
+## Run History (all six runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
+
+## Full Run 6 Results
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
+| deepseek-coder-v2:16b | 24.2 | 0.833 | 0.885 | 1383.8 | 0.737 | 0.760 | coding |
+| deepseek-coder-v2:latest | 24.1 | 0.833 | 0.885 | 1411.4 | 0.735 | 0.759 | coding |
+| qwen2.5-coder:7b | 12.6 | 0.850 | 0.910 | 1210.0 | 0.666 | 0.693 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2181.0 | 0.573 | 0.609 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4336.2 | 0.432 | 0.321 | coding |
+| llama3.2:3b | 24.2 | 0.890 | 0.954 | 581.0 | 0.803 | 0.832 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2183.4 | 0.600 | 0.624 | general |
+| gemma3:12b-it-q4_K_M | 6.2 | 0.873 | 0.966 | 5540.1 | 0.440 | 0.482 | general |
+
+## Run 5 → Run 6 Comparison (all common models)
+
+| Model | R5 tok/sec | R6 tok/sec | R5 coding_comp | R6 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.1 | 24.2 | 0.727 | 0.737 | +0.010 (noise) |
+| qwen2.5-coder:7b | 12.6 | 12.6 | 0.674 | 0.666 | −0.008 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.573 | 0.000 |
+| llama3.2:3b | 24.5 | 24.2 | 0.806 | 0.803 | −0.003 (noise) |
+| llama3.1:8b | 11.9 | 11.8 | 0.600 | 0.600 | 0.000 |
+| gemma3:12b-it-q4_K_M | 6.2 | 6.2 | 0.439 | 0.440 | +0.001 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.436 | 0.432 | −0.004 (noise) |
+
+Quality scores are **identical** across all common models. All composites within run-to-run
+noise (≤ ±0.010). Rubric confirmed deterministic across 6 runs.
+
+## Run 6 Slot Assignments (model_selection.json — current state)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.832 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.482 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.737 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 ← REGRESSION |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:7b | 0.666 |
+
+## Findings
+
+**Finding 1 — deepseek-coder-v2:latest re-appeared in slot4 (REGRESSION, now fixed).**
+Previously confirmed duplicate of `:16b` and removed after Run 4. Re-appeared in Run 6
+because `group_vars/all.yml` contained two pull sources:
+
+1. `baseline_models` (line 121): `"deepseek-coder-v2"` — untagged, Ollama resolves to
+   `:latest`, re-pulling the duplicate on every benchmark run.
+2. `candidate_models`: explicit `"deepseek-coder-v2:latest"` entry — unconditionally pulls
+   `:latest` as a testable model.
+
+**Fix applied to `inventory/group_vars/all.yml`:**
+- `baseline_models`: changed `"deepseek-coder-v2"` → `"deepseek-coder-v2:16b"` (explicit tag)
+- `candidate_models`: removed the `deepseek-coder-v2:latest` entry entirely
+
+**Also required on ai_server:** `ollama rm deepseek-coder-v2:latest`
+
+**Finding 2 — All scores and tuning variables remain stable (no action).** Every delta vs
+Run 5 is within noise (≤ ±0.010 composite, quality scores identical). The rubric is
+confirmed deterministic across 6 runs.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal |
+| `benchmark_large_timeout` | 480s | Adequate |
+| `benchmark_toks_norm_ceiling` | 40 | Correct |
+| `benchmark_coding_threshold` | 0.10 | Correct |
+
+**Finding 3 — qwen2.5-coder:14B not yet in slot6 (consequence of Finding 1).** With
+deepseek:latest occupying slot4, the coding rank yields:
+  #1 deepseek:16b (0.737) → slot3, #2 deepseek:latest (0.735) → slot4,
+  #3 qwen:7b (0.666) → slot6, #4 qwen:14B (0.573) → excluded.
+After deepseek:latest is permanently removed, Run 7 expected layout:
+  slot3=deepseek:16b, slot4=qwen:7b, slot6=qwen:14B.
+
+**Finding 4 — gemma3:12b TTFT=5540ms (informational, no action).** Persistently above
+5000ms floor → latency_score=0 every run. Hardware-limited, not a tuning issue.
+Correctly holds slot5_general_rotate on general_quality=0.966.
+
+**Finding 5 — codellama:34b correctly excluded again (informational, no action).**
+composite=0.432, general_quality=0.586 — below both thresholds. Pipeline working as designed.
+
+## Next Action
+
+1. Remove duplicate from ai_server: `ollama rm deepseek-coder-v2:latest`
+2. Run 7 (clean benchmark):
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+Expected Run 7: slot4=`qwen2.5-coder:7b`, slot6=`qwen2.5-coder:14B`,
+`deepseek-coder-v2:latest` absent from `all_metrics`.
+
+---
+
+# Addendum — Run 7 Review (target Node 0 layout achieved, session closed)
+
+## Run History (all seven runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
+| 7 | 20260310T170013 | group_vars fix applied, deepseek:latest absent | 7 | quality=0.83–0.97, tok/sec=3.2–23.5 |
+
+## Full Run 7 Results
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
+| deepseek-coder-v2:16b | 23.5 | 0.833 | 0.885 | 1568.5 | 0.723 | 0.746 | coding |
+| qwen2.5-coder:7b | 12.5 | 0.850 | 0.910 | 1431.0 | 0.655 | 0.682 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2229.7 | 0.570 | 0.607 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4235.4 | 0.437 | 0.326 | coding |
+| llama3.2:3b | 23.0 | 0.890 | 0.954 | 754.8 | 0.786 | 0.814 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2202.0 | 0.599 | 0.623 | general |
+| gemma3:12b-it-q4_K_M | 6.1 | 0.873 | 0.966 | 5941.9 | 0.439 | 0.481 | general |
+
+`deepseek-coder-v2:latest` **absent** from `all_metrics` — group_vars fix verified working.
+
+## Run 6 → Run 7 Comparison (all common models)
+
+| Model | R6 tok/sec | R7 tok/sec | R6 coding_comp | R7 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.2 | 23.5 | 0.737 | 0.723 | −0.014 (noise) |
+| qwen2.5-coder:7b | 12.6 | 12.5 | 0.666 | 0.655 | −0.011 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.570 | −0.003 (noise) |
+| llama3.2:3b | 24.2 | 23.0 | 0.803 | 0.786 | −0.017 (noise) |
+| llama3.1:8b | 11.8 | 11.8 | 0.600 | 0.599 | −0.001 (noise) |
+| gemma3:12b-it-q4_K_M | 6.2 | 6.1 | 0.440 | 0.439 | −0.001 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.432 | 0.437 | +0.005 (noise) |
+
+Quality scores are **identical** across all common models. All composites within run-to-run
+noise (≤ ±0.017). Rubric confirmed deterministic across 7 runs.
+
+## Run 7 Slot Assignments (final, confirmed clean)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 ✅ |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.570 ✅ |
+
+## Findings
+
+**Finding 1 — Target Node 0 diversity layout achieved (RESOLVED).** Run 7 confirms the
+intended three-tier Node 0 layout:
+- slot3: deepseek-coder-v2:16b — deep specialist (23.5 tok/sec, 16B params)
+- slot4: qwen2.5-coder:7b — fast+light (12.5 tok/sec, 7B params)
+- slot6: qwen2.5-coder:14B — slower+richer (6.6 tok/sec, 14B params)
+
+All three are genuinely distinct models with different speed/quality tradeoffs.
+
+**Finding 2 — group_vars fix verified working (RESOLVED).** `deepseek-coder-v2:latest` is
+absent from `all_metrics`. Explicit `:16b` tag in `baseline_models` prevents Ollama from
+resolving to `:latest` on subsequent runs. The fix is durable — re-running `03_benchmark.yml`
+will not re-introduce the duplicate.
+
+**Finding 3 — All scores and tuning variables stable (no action).** Every delta vs Run 6 is
+within noise (≤ ±0.017 composite, quality scores identical). The pipeline is confirmed
+deterministic and stable.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal |
+| `benchmark_large_timeout` | 480s | Adequate |
+| `benchmark_toks_norm_ceiling` | 40 | Correct |
+| `benchmark_coding_threshold` | 0.10 | Correct |
+
+**Finding 4 — Benchmark pipeline declared stable. Session closed.** Seven runs over two
+days confirmed: NUMA fix correct, scoring rubric deterministic, duplicate-model detection
+pattern documented, group_vars idempotent. No further benchmark runs or tuning changes are
+needed unless new models are added to `candidate_models`.

+ 151 - 71
benchmarks/results/model_selection.json

@@ -1,116 +1,196 @@
 {
     "all_metrics": {
-        "deepseek-coder-v2:latest": {
-            "avg_tok_per_sec": 21.6,
+        "codellama:34b": {
+            "avg_tok_per_sec": 3.2,
             "category": "coding",
-            "coding_composite": 0.764,
-            "coding_quality": 0.657,
-            "general_composite": 0.867,
-            "general_quality": 0.886,
-            "latency_ms": 1510.5,
-            "latency_score": 0.698,
-            "toks_norm": 0.982
+            "coding_composite": 0.437,
+            "coding_quality": 0.833,
+            "general_composite": 0.326,
+            "general_quality": 0.586,
+            "latency_ms": 4235.4,
+            "latency_score": 0.153,
+            "toks_norm": 0.08
+        },
+        "deepseek-coder-v2:16b": {
+            "avg_tok_per_sec": 23.5,
+            "category": "coding",
+            "coding_composite": 0.723,
+            "coding_quality": 0.833,
+            "general_composite": 0.746,
+            "general_quality": 0.885,
+            "latency_ms": 1568.5,
+            "latency_score": 0.686,
+            "toks_norm": 0.586
         },
         "gemma3:12b-it-q4_K_M": {
-            "avg_tok_per_sec": 5.6,
+            "avg_tok_per_sec": 6.1,
             "category": "general",
-            "coding_composite": 0.416,
-            "coding_quality": 0.757,
-            "general_composite": 0.495,
-            "general_quality": 0.931,
-            "latency_ms": 5975.8,
+            "coding_composite": 0.439,
+            "coding_quality": 0.873,
+            "general_composite": 0.481,
+            "general_quality": 0.966,
+            "latency_ms": 5941.9,
             "latency_score": 0,
-            "toks_norm": 0.253
+            "toks_norm": 0.153
+        },
+        "llama3.1:8b": {
+            "avg_tok_per_sec": 11.8,
+            "category": "general",
+            "coding_composite": 0.599,
+            "coding_quality": 0.823,
+            "general_composite": 0.623,
+            "general_quality": 0.877,
+            "latency_ms": 2202.0,
+            "latency_score": 0.56,
+            "toks_norm": 0.294
         },
         "llama3.2:3b": {
-            "avg_tok_per_sec": 22.5,
+            "avg_tok_per_sec": 23.0,
             "category": "general",
-            "coding_composite": 0.846,
-            "coding_quality": 0.723,
-            "general_composite": 0.961,
-            "general_quality": 0.979,
-            "latency_ms": 580.7,
-            "latency_score": 0.884,
-            "toks_norm": 1.0
+            "coding_composite": 0.786,
+            "coding_quality": 0.89,
+            "general_composite": 0.814,
+            "general_quality": 0.954,
+            "latency_ms": 754.8,
+            "latency_score": 0.849,
+            "toks_norm": 0.576
+        },
+        "qwen2.5-coder:14B": {
+            "avg_tok_per_sec": 6.6,
+            "category": "coding",
+            "coding_composite": 0.57,
+            "coding_quality": 0.85,
+            "general_composite": 0.607,
+            "general_quality": 0.931,
+            "latency_ms": 2229.7,
+            "latency_score": 0.554,
+            "toks_norm": 0.164
         },
         "qwen2.5-coder:7b": {
-            "avg_tok_per_sec": 12.3,
+            "avg_tok_per_sec": 12.5,
             "category": "coding",
-            "coding_composite": 0.664,
-            "coding_quality": 0.683,
-            "general_composite": 0.756,
-            "general_quality": 0.888,
-            "latency_ms": 1222.4,
-            "latency_score": 0.756,
-            "toks_norm": 0.56
+            "coding_composite": 0.655,
+            "coding_quality": 0.85,
+            "general_composite": 0.682,
+            "general_quality": 0.91,
+            "latency_ms": 1431.0,
+            "latency_score": 0.714,
+            "toks_norm": 0.312
         }
     },
     "coding_ranking": [
         {
-            "composite": 0.764,
+            "composite": 0.723,
             "metrics": {
-                "avg_tok_per_sec": 21.6,
+                "avg_tok_per_sec": 23.5,
                 "category": "coding",
-                "coding_composite": 0.764,
-                "coding_quality": 0.657,
-                "general_composite": 0.867,
-                "general_quality": 0.886,
-                "latency_ms": 1510.5,
-                "latency_score": 0.698,
-                "toks_norm": 0.982
+                "coding_composite": 0.723,
+                "coding_quality": 0.833,
+                "general_composite": 0.746,
+                "general_quality": 0.885,
+                "latency_ms": 1568.5,
+                "latency_score": 0.686,
+                "toks_norm": 0.586
             },
-            "name": "deepseek-coder-v2:latest"
+            "name": "deepseek-coder-v2:16b"
         },
         {
-            "composite": 0.664,
+            "composite": 0.655,
             "metrics": {
-                "avg_tok_per_sec": 12.3,
+                "avg_tok_per_sec": 12.5,
                 "category": "coding",
-                "coding_composite": 0.664,
-                "coding_quality": 0.683,
-                "general_composite": 0.756,
-                "general_quality": 0.888,
-                "latency_ms": 1222.4,
-                "latency_score": 0.756,
-                "toks_norm": 0.56
+                "coding_composite": 0.655,
+                "coding_quality": 0.85,
+                "general_composite": 0.682,
+                "general_quality": 0.91,
+                "latency_ms": 1431.0,
+                "latency_score": 0.714,
+                "toks_norm": 0.312
             },
             "name": "qwen2.5-coder:7b"
+        },
+        {
+            "composite": 0.57,
+            "metrics": {
+                "avg_tok_per_sec": 6.6,
+                "category": "coding",
+                "coding_composite": 0.57,
+                "coding_quality": 0.85,
+                "general_composite": 0.607,
+                "general_quality": 0.931,
+                "latency_ms": 2229.7,
+                "latency_score": 0.554,
+                "toks_norm": 0.164
+            },
+            "name": "qwen2.5-coder:14B"
+        },
+        {
+            "composite": 0.437,
+            "metrics": {
+                "avg_tok_per_sec": 3.2,
+                "category": "coding",
+                "coding_composite": 0.437,
+                "coding_quality": 0.833,
+                "general_composite": 0.326,
+                "general_quality": 0.586,
+                "latency_ms": 4235.4,
+                "latency_score": 0.153,
+                "toks_norm": 0.08
+            },
+            "name": "codellama:34b"
         }
     ],
     "general_ranking": [
         {
-            "composite": 0.961,
+            "composite": 0.814,
             "metrics": {
-                "avg_tok_per_sec": 22.5,
+                "avg_tok_per_sec": 23.0,
                 "category": "general",
-                "coding_composite": 0.846,
-                "coding_quality": 0.723,
-                "general_composite": 0.961,
-                "general_quality": 0.979,
-                "latency_ms": 580.7,
-                "latency_score": 0.884,
-                "toks_norm": 1.0
+                "coding_composite": 0.786,
+                "coding_quality": 0.89,
+                "general_composite": 0.814,
+                "general_quality": 0.954,
+                "latency_ms": 754.8,
+                "latency_score": 0.849,
+                "toks_norm": 0.576
             },
             "name": "llama3.2:3b"
         },
         {
-            "composite": 0.495,
+            "composite": 0.623,
             "metrics": {
-                "avg_tok_per_sec": 5.6,
+                "avg_tok_per_sec": 11.8,
                 "category": "general",
-                "coding_composite": 0.416,
-                "coding_quality": 0.757,
-                "general_composite": 0.495,
-                "general_quality": 0.931,
-                "latency_ms": 5975.8,
+                "coding_composite": 0.599,
+                "coding_quality": 0.823,
+                "general_composite": 0.623,
+                "general_quality": 0.877,
+                "latency_ms": 2202.0,
+                "latency_score": 0.56,
+                "toks_norm": 0.294
+            },
+            "name": "llama3.1:8b"
+        },
+        {
+            "composite": 0.481,
+            "metrics": {
+                "avg_tok_per_sec": 6.1,
+                "category": "general",
+                "coding_composite": 0.439,
+                "coding_quality": 0.873,
+                "general_composite": 0.481,
+                "general_quality": 0.966,
+                "latency_ms": 5941.9,
                 "latency_score": 0,
-                "toks_norm": 0.253
+                "toks_norm": 0.153
             },
             "name": "gemma3:12b-it-q4_K_M"
         }
     ],
     "slot1_general": "llama3.2:3b",
-    "slot2_general": "gemma3:12b-it-q4_K_M",
-    "slot3_coding": "deepseek-coder-v2:latest",
-    "slot4_coding": "qwen2.5-coder:7b"
+    "slot2_general": "llama3.1:8b",
+    "slot3_coding": "deepseek-coder-v2:16b",
+    "slot4_coding": "qwen2.5-coder:7b",
+    "slot5_general_rotate": "gemma3:12b-it-q4_K_M",
+    "slot6_coding_rotate": "qwen2.5-coder:14B"
 }

+ 26 - 10
inventory/group_vars/all.yml

@@ -51,6 +51,7 @@ vault_approle_name: "ai-services"
 # Service ports
 keycloak_port: 8180
 ollama_port: 11434
+ollama_node0_port: 11435
 qdrant_http_port: 6333
 qdrant_grpc_port: 6334
 
@@ -58,17 +59,19 @@ qdrant_grpc_port: 6334
 ollama_host: "0.0.0.0:11434"
 ollama_num_threads: 14
 ollama_num_parallel: 2
-ollama_max_loaded_models: 4
+ollama_max_loaded_models: 3   # 3 per socket (6 total across both NUMA instances)
 ollama_keep_alive: "-1"
 ollama_flash_attention: "1"
 
 # NUMA/CPU affinity - Dell M630, 2x E5-2690v4
 # CPUs are interleaved: odd = socket 1 (NUMA node 1), even = socket 0.
 # Physical cores on node 1: 1,3,...,27 (14 cores). HT siblings: 29,31,...,55.
+# Physical cores on node 0: 0,2,...,26 (14 cores). HT siblings: 28,30,...,54.
 # Pinning to physical cores only eliminates HT contention on the memory bus.
 # NUMA node 1 has ~120 GB free RAM vs node 0's ~75 GB.
 ollama_numa_node: "1"
 ollama_cpu_affinity: "1 3 5 7 9 11 13 15 17 19 21 23 25 27"
+ollama_node0_cpu_affinity: "0 2 4 6 8 10 12 14 16 18 20 22 24 26"
 ollama_binary_path: /usr/bin/ollama
 
 # Keycloak configuration
@@ -85,9 +88,27 @@ benchmark_thresholds:
   min_quality_score: 0.6
   min_composite_score: 0.55
 
-benchmark_toks_norm_ceiling: 22     # Observed hardware max on Dell M630 (22.5 tok/sec measured)
+benchmark_toks_norm_ceiling: 40     # Conservative dual-socket estimate (was 22 single-socket)
 benchmark_coding_threshold: 0.10    # Delta to classify a model as coding-specialized
 
+# Modelfile aliases created by 04_models.yml — excluded from benchmark to prevent
+# 32k-token KV cache allocations stalling the run with 285-second response times.
+benchmark_skip_aliases:
+  - "coder-128k"
+  - "coder-32k"
+  - "coder-rotate"
+  - "llama-family"
+  - "gemma-family"
+
+benchmark_small_max_gb: 10    # upper size boundary for small pass (< 10 GB), based on runtime RAM
+benchmark_medium_max_gb: 15   # upper size boundary for medium pass (10–15 GB), based on runtime RAM
+benchmark_size_overhead_factor: 1.2  # ollama list shows disk size; multiply by this to estimate runtime RAM
+benchmark_load_timeout: 180      # seconds — warm-up "Hi" prompt per model before benchmarking
+benchmark_small_timeout: 90      # seconds per request, small models (<10 GB)
+benchmark_medium_timeout: 240    # seconds per request, medium models (10–15 GB)
+benchmark_large_timeout: 480     # seconds per request, large models (>15 GB)
+benchmark_num_predict: 500       # cap output tokens; allows full coding responses (def+return+docstring+assert); worst-case: 6.5 tok/s→77s, 22 tok/s→23s
+
 # Explicit category overrides applied before heuristics. Keys are model names as
 # returned by `ollama list`. Valid values: 'coding' or 'general'.
 # Example: { "deepseek-coder-v2": "coding", "qwen2.5-coder:7b": "coding" }
@@ -97,7 +118,7 @@ model_category_overrides: {}
 # These are the minimum set needed to populate all 4 slots with meaningful candidates.
 baseline_models:
   - "llama3.2:3b"
-  - "deepseek-coder-v2"
+  - "deepseek-coder-v2:16b"
   - "qwen2.5-coder:7b"
   - "llama3.1:8b"
 
@@ -108,11 +129,6 @@ candidate_models:
     expected_tokens_sec: 4.5
     reason: "Larger qwen2.5-coder for higher quality"
     category: coding
-  - name: "deepseek-coder-v2:latest"
-    size_gb: 9
-    expected_tokens_sec: 8.0
-    reason: "DeepSeek Coder V2 full model"
-    category: coding
   - name: "codegemma:7b-instruct-q5_K_M"
     size_gb: 5.5
     expected_tokens_sec: 12.0
@@ -124,8 +140,8 @@ candidate_models:
     reason: "StarCoder2 coding specialist"
     category: coding
 
-# OpenClaw default model
-openclaw_model: "llama3.2:3b"
+# OpenClaw default model — overridden dynamically by 08_openclaw.yml from slot1_general
+openclaw_model: "deepseek-coder-v2:16b-lite-instruct-q4_K_M"
 
 # AWS Bedrock (OpenAI-compatible API via Open WebUI)
 # Pass bearer_token on first run: -e "bedrock_bearer_token=<value>"

+ 49 - 0
playbooks/01_vault.yml

@@ -132,12 +132,14 @@
       register: vault_init_check
       tags:
         - vault-init
+        - vault-unseal
 
     - name: "Vault | Set initialization status fact"
       ansible.builtin.set_fact:
         vault_is_initialized: "{{ vault_init_check.status != 501 }}"
       tags:
         - vault-init
+        - vault-unseal
 
     - name: "Vault | Initialize Vault"
       ansible.builtin.command:
@@ -235,6 +237,47 @@
       tags:
         - vault-unseal
 
+    # ── Auto-unseal on reboot ─────────────────────────────────────────
+    - name: "Vault | Deploy unseal key to server"
+      ansible.builtin.copy:
+        content: "{{ vault_init_data.unseal_keys_b64[0] }}"
+        dest: /etc/vault.d/unseal.key
+        owner: root
+        group: root
+        mode: "0400"
+      tags:
+        - vault-unseal
+        - vault-autounseal
+
+    - name: "Vault | Deploy vault-unseal.sh"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.sh.j2"
+        dest: /usr/local/bin/vault-unseal.sh
+        owner: root
+        group: root
+        mode: "0750"
+      tags:
+        - vault-autounseal
+
+    - name: "Vault | Deploy vault-unseal.service"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.service.j2"
+        dest: /etc/systemd/system/vault-unseal.service
+        owner: root
+        group: root
+        mode: "0644"
+      notify: Reload systemd and restart vault-unseal
+      tags:
+        - vault-autounseal
+
+    - name: "Vault | Enable vault-unseal.service"
+      ansible.builtin.systemd:
+        name: vault-unseal.service
+        enabled: true
+        daemon_reload: true
+      tags:
+        - vault-autounseal
+
     - name: "Vault | Set root token fact"
       ansible.builtin.set_fact:
         vault_root_token: "{{ vault_init_data.root_token }}"
@@ -516,3 +559,9 @@
         name: vault
         state: restarted
         daemon_reload: true
+
+    - name: Reload systemd and restart vault-unseal
+      ansible.builtin.systemd:
+        name: vault-unseal.service
+        state: restarted
+        daemon_reload: true

+ 44 - 2
playbooks/02_infrastructure.yml

@@ -155,6 +155,42 @@
       tags:
         - ollama
 
+    - name: "Ollama | Deploy ollama-node0 systemd unit"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/ollama/ollama-node0.service.j2"
+        dest: /etc/systemd/system/ollama-node0.service
+        mode: "0644"
+        owner: root
+        group: root
+      notify:
+        - Reload systemd and start ollama-node0
+      tags:
+        - ollama
+
+    - name: "Ollama | Enable and start ollama-node0"
+      ansible.builtin.systemd:
+        name: ollama-node0
+        enabled: true
+        state: started
+        daemon_reload: true
+      tags:
+        - ollama
+
+    - name: "Ollama | Wait for ollama-node0 API to be ready"
+      ansible.builtin.uri:
+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
+        method: GET
+        headers:
+          Authorization: "Bearer {{ ollama_api_key }}"
+        status_code: 200
+        timeout: 10
+      register: ollama_node0_ready
+      retries: 24
+      delay: 5
+      until: ollama_node0_ready.status == 200
+      tags:
+        - ollama
+
     # ── OS-level kernel tuning for dedicated inference server ────────────────
     - name: "OS Tune | Apply sysctl settings for inference workload"
       ansible.posix.sysctl:
@@ -164,8 +200,8 @@
         reload: true
         state: present
       loop:
-        # Disable auto-NUMA migration — fights explicit numactl --membind=1 by
-        # moving KV-cache pages mid-inference to a different NUMA node.
+        # Disable auto-NUMA migration — CPUAffinity pins Ollama to node 1/0
+        # physical cores; NUMA balancing could migrate pages mid-inference.
         - { name: kernel.numa_balancing, value: "0" }
         # Near-zero swappiness: prevents model weights being paged out under
         # memory pressure (complements LimitMEMLOCK=infinity in the unit file).
@@ -261,6 +297,12 @@
         state: restarted
         daemon_reload: true
 
+    - name: Reload systemd and start ollama-node0
+      ansible.builtin.systemd:
+        name: ollama-node0
+        state: started
+        daemon_reload: true
+
     - name: Reload systemd daemon
       ansible.builtin.systemd:
         daemon_reload: true

+ 165 - 37
playbooks/03_benchmark.yml

@@ -84,6 +84,31 @@
       tags:
         - benchmark-discover
 
+    - name: "Benchmark | Stop warmup services for clean benchmark run"
+      ansible.builtin.systemd:
+        name: "{{ item }}"
+        state: stopped
+      loop:
+        - ollama-warmup.service
+        - ollama-warmup-node0.service
+      failed_when: false
+      become: true
+      tags:
+        - benchmark-setup
+
+    - name: "Benchmark | Wait for node0 Ollama API to be ready"
+      ansible.builtin.uri:
+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
+        method: GET
+        status_code: 200
+        timeout: 10
+      register: ollama_node0_ready
+      retries: 24
+      delay: 5
+      until: ollama_node0_ready.status == 200
+      tags:
+        - benchmark-setup
+
     - name: "Benchmark | Discover installed models"
       ansible.builtin.command: ollama list
       changed_when: false
@@ -100,44 +125,124 @@
       tags:
         - benchmark-discover
 
-    - name: "Benchmark | Set models_to_benchmark to all installed models"
+    - name: "Benchmark | Parse model sizes from ollama list"
       ansible.builtin.set_fact:
-        models_to_benchmark: "{{ installed_models }}"
+        _benchmark_sizes_json: |
+          {% set ns = namespace(d={}) %}
+          {% for line in ollama_list_output.stdout_lines[1:] %}
+          {%   set p = line.split() %}
+          {%   if p | length >= 4 %}
+          {%     set gb = (p[2] | float) if (p[3] | upper == 'GB') else ((p[2] | float) / 1024) %}
+          {%     set _ = ns.d.update({p[0]: gb}) %}
+          {%   endif %}
+          {% endfor %}
+          {{ ns.d | to_json }}
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Partition models into small, medium, and large passes"
+      ansible.builtin.set_fact:
+        _small_models:  "{{ _alias_filtered | select('in', _small_ok)  | list }}"
+        _medium_models: "{{ _alias_filtered | select('in', _medium_ok) | list }}"
+        _large_models:  "{{ _alias_filtered | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
+        models_to_benchmark: "{{ _alias_filtered | list }}"
+      vars:
+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
+                                            | selectattr('value', 'le', _medium_cut | float)
+                                            | map(attribute='key') | list }}"
+        _alias_filtered: "{{ installed_models | reject('match', '^(' ~ benchmark_skip_aliases | join('|') ~ ')(:|$)') | list }}"
       when: benchmark_models | default('') | length == 0
       tags:
         - benchmark-discover
 
     - name: "Benchmark | Set models_to_benchmark to specified subset"
       ansible.builtin.set_fact:
-        models_to_benchmark: "{{ benchmark_models.split(',') | map('trim') | list }}"
+        models_to_benchmark: "{{ _specified }}"
+        _small_models:  "{{ _specified | select('in', _small_ok)  | list }}"
+        _medium_models: "{{ _specified | select('in', _medium_ok) | list }}"
+        _large_models:  "{{ _specified | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
+      vars:
+        _specified: "{{ benchmark_models.split(',') | map('trim') | list }}"
+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
+                                            | selectattr('value', 'le', _medium_cut | float)
+                                            | map(attribute='key') | list }}"
       when: benchmark_models | default('') | length > 0
       tags:
         - benchmark-discover
 
+    - name: "Benchmark | Initialize batch accumulator facts"
+      ansible.builtin.set_fact:
+        bench_all_results: []
+        all_eligible_models: []
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Build per-model benchmark timeout map"
+      ansible.builtin.set_fact:
+        _benchmark_timeout_map_json: |
+          {% set ns = namespace(d={}) %}
+          {% for m in models_to_benchmark %}
+          {%   if m in _small_models %}
+          {%     set _ = ns.d.update({m: benchmark_small_timeout | int}) %}
+          {%   elif m in _medium_models %}
+          {%     set _ = ns.d.update({m: benchmark_medium_timeout | int}) %}
+          {%   else %}
+          {%     set _ = ns.d.update({m: benchmark_large_timeout | int}) %}
+          {%   endif %}
+          {% endfor %}
+          {{ ns.d | to_json }}
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Parse benchmark timeout map"
+      ansible.builtin.set_fact:
+        _benchmark_timeout_map: "{{ _benchmark_timeout_map_json | from_json }}"
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Sort models largest-first so heaviest models land on node1 (120 GB)"
+      ansible.builtin.set_fact:
+        models_to_benchmark: >-
+          {{ (_large_models + _medium_models + _small_models)
+             | select('in', models_to_benchmark) | list }}
+      tags:
+        - benchmark-discover
+
     - name: "Benchmark | Display models to benchmark"
       ansible.builtin.debug:
-        msg: "Will benchmark the following models: {{ models_to_benchmark }}"
+        msg:
+          - "Small  pass (timeout {{ benchmark_small_timeout }}s,  ≤{{ benchmark_small_max_gb }}GB):  {{ _small_models }}"
+          - "Medium pass (timeout {{ benchmark_medium_timeout }}s, {{ benchmark_small_max_gb }}–{{ benchmark_medium_max_gb }}GB): {{ _medium_models }}"
+          - "Large  pass (timeout {{ benchmark_large_timeout }}s, >{{ benchmark_medium_max_gb }}GB): {{ _large_models }}"
+          - "Load timeout (warm-up 'Hi' prompt): {{ benchmark_load_timeout }}s"
+          - "Total: {{ models_to_benchmark | length }} models, {{ (models_to_benchmark | batch(6) | list) | length }} batch(es) of ≤6"
       tags:
         - benchmark-discover
 
-    - name: "Benchmark | Run test prompts against each model"
-      ansible.builtin.uri:
-        url: "{{ ollama_api_url }}/api/generate"
-        method: POST
-        body_format: json
-        body:
-          model: "{{ item.0 }}"
-          prompt: "{{ test_prompts[item.1].prompt }}"
-          stream: false
-        headers:
-          Authorization: "Bearer {{ ollama_api_key }}"
-        timeout: 300
-        status_code: 200
-      loop: "{{ models_to_benchmark | product(test_prompts.keys() | list) | list }}"
+    - name: "Benchmark | Process batch {{ _loop_idx + 1 }} of {{ models_to_benchmark | batch(6) | list | length }}"
+      ansible.builtin.include_tasks: _bench_tier_batch.yml
+      vars:
+        _batch_node1: "{{ _batch[:3] }}"
+        _batch_node0: "{{ _batch[3:] }}"
+      loop: "{{ models_to_benchmark | batch(6) | list }}"
       loop_control:
-        label: "{{ item.0 }} / {{ item.1 }}"
-      register: benchmark_raw_results
-      failed_when: false
+        loop_var: _batch
+        label: "batch {{ _loop_idx + 1 }}: node1={{ _batch[:3] }} node0={{ _batch[3:] }}"
+        index_var: _loop_idx
+      tags:
+        - benchmark-run
+
+    - name: "Benchmark | Display models that failed to load"
+      ansible.builtin.debug:
+        msg: "Load failures (excluded from scoring): {{ models_to_benchmark | reject('in', all_eligible_models) | list }}"
       tags:
         - benchmark-run
 
@@ -145,9 +250,9 @@
       ansible.builtin.set_fact:
         model_metrics: |
           {% set ns = namespace(results={}) %}
-          {% for model in models_to_benchmark %}
+          {% for model in all_eligible_models %}
           {%   set ns2 = namespace(coding_quality=0, coding_count=0, general_quality=0, general_count=0, total_toks=0, total_eval_time=0, ttft_sum=0, ttft_count=0, latency_ns=0) %}
-          {%   for result in benchmark_raw_results.results %}
+          {%   for result in bench_all_results %}
           {%     if result.item[0] == model and result.status == 200 %}
           {%       set test_name = result.item[1] %}
           {%       set resp = result.json | default({}) %}
@@ -160,7 +265,7 @@
           {%       set ns2.ttft_sum = ns2.ttft_sum + prompt_eval_duration %}
           {%       set ns2.ttft_count = ns2.ttft_count + 1 %}
           {%       if test_name == 'latency' %}
-          {%         set ns2.latency_ns = eval_duration + prompt_eval_duration %}
+          {%         set ns2.latency_ns = ((resp.total_duration | default(0) | int) - (resp.load_duration | default(0) | int)) | abs %}
           {%       endif %}
           {%       set resp_len = response_text | length %}
           {%       if test_name in ['code_gen', 'debug', 'refactor'] %}
@@ -239,9 +344,14 @@
           {% set coding_sorted = coding_models | sort(attribute='composite', reverse=true) %}
           {% set slot1 = general_sorted[0].name if general_sorted | length > 0 else 'none' %}
           {% set slot2 = general_sorted[1].name if general_sorted | length > 1 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
+          {% set slot5 = general_sorted[2].name if general_sorted | length > 2 else 'none' %}
           {% set slot3 = coding_sorted[0].name if coding_sorted | length > 0 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
           {% set slot4 = coding_sorted[1].name if coding_sorted | length > 1 else (coding_sorted[0].name if coding_sorted | length > 0 else 'none') %}
-          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot3_coding': slot3, 'slot4_coding': slot4, 'all_metrics': parsed_metrics, 'general_ranking': general_sorted, 'coding_ranking': coding_sorted} | to_json }}
+          {% set slot6 = coding_sorted[2].name if coding_sorted | length > 2 else 'none' %}
+          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot5_general_rotate': slot5,
+              'slot3_coding': slot3, 'slot4_coding': slot4, 'slot6_coding_rotate': slot6,
+              'all_metrics': parsed_metrics, 'general_ranking': general_sorted,
+              'coding_ranking': coding_sorted} | to_json }}
       tags:
         - benchmark-select
 
@@ -255,12 +365,16 @@
       ansible.builtin.debug:
         msg:
           - "============================================="
-          - "  MODEL SELECTION RESULTS"
+          - "  MODEL SELECTION RESULTS  (6-slot / 2-socket)"
           - "============================================="
-          - "  Slot 1 (General Primary):  {{ selection.slot1_general }}"
-          - "  Slot 2 (General Secondary): {{ selection.slot2_general }}"
-          - "  Slot 3 (Coding Primary):   {{ selection.slot3_coding }}"
-          - "  Slot 4 (Coding Secondary): {{ selection.slot4_coding }}"
+          - "  Node 1 — General (port 11434)"
+          - "  Slot 1 (locked):   {{ selection.slot1_general }}"
+          - "  Slot 2 (locked):   {{ selection.slot2_general }}"
+          - "  Slot 5 (rotate):   {{ selection.slot5_general_rotate }}"
+          - "  Node 0 — Coding (port 11435)"
+          - "  Slot 3 (locked):   {{ selection.slot3_coding }}"
+          - "  Slot 4 (locked):   {{ selection.slot4_coding }}"
+          - "  Slot 6 (rotate):   {{ selection.slot6_coding_rotate }}"
           - "============================================="
       tags:
         - benchmark-select
@@ -276,13 +390,15 @@
         content: |
           # Benchmark Results - {{ benchmark_timestamp }}
 
-          ## Model Selection
-          | Slot | Role | Model | Composite Score |
-          |------|------|-------|----------------|
-          | 1 | General (Primary) | {{ selection.slot1_general }} | {{ parsed_metrics[selection.slot1_general].general_composite | default('N/A') }} |
-          | 2 | General (Secondary) | {{ selection.slot2_general }} | {{ parsed_metrics[selection.slot2_general].general_composite | default('N/A') }} |
-          | 3 | Coding (Primary) | {{ selection.slot3_coding }} | {{ parsed_metrics[selection.slot3_coding].coding_composite | default('N/A') }} |
-          | 4 | Coding (Secondary) | {{ selection.slot4_coding }} | {{ parsed_metrics[selection.slot4_coding].coding_composite | default('N/A') }} |
+          ## Model Selection (6-slot / 2-socket)
+          | Slot | Socket | Role | Model | Composite Score |
+          |------|--------|------|-------|----------------|
+          | 1 | Node 1 (port 11434) | General (locked) | {{ selection.slot1_general }} | {{ (parsed_metrics[selection.slot1_general].general_composite if selection.slot1_general in parsed_metrics else 'N/A') }} |
+          | 2 | Node 1 (port 11434) | General (locked) | {{ selection.slot2_general }} | {{ (parsed_metrics[selection.slot2_general].general_composite if selection.slot2_general in parsed_metrics else 'N/A') }} |
+          | 5 | Node 1 (port 11434) | General (rotate) | {{ selection.slot5_general_rotate }} | {{ (parsed_metrics[selection.slot5_general_rotate].general_composite if selection.slot5_general_rotate in parsed_metrics else 'N/A') }} |
+          | 3 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot3_coding }} | {{ (parsed_metrics[selection.slot3_coding].coding_composite if selection.slot3_coding in parsed_metrics else 'N/A') }} |
+          | 4 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot4_coding }} | {{ (parsed_metrics[selection.slot4_coding].coding_composite if selection.slot4_coding in parsed_metrics else 'N/A') }} |
+          | 6 | Node 0 (port 11435) | Coding (rotate) | {{ selection.slot6_coding_rotate }} | {{ (parsed_metrics[selection.slot6_coding_rotate].coding_composite if selection.slot6_coding_rotate in parsed_metrics else 'N/A') }} |
 
           ## Detailed Metrics
           {% for model, metrics in parsed_metrics.items() %}
@@ -342,3 +458,15 @@
       changed_when: true
       tags:
         - benchmark-pull
+
+    - name: "Benchmark | Restart warmup services after benchmark"
+      ansible.builtin.systemd:
+        name: "{{ item }}"
+        state: restarted
+      loop:
+        - ollama-warmup.service
+        - ollama-warmup-node0.service
+      failed_when: false
+      become: true
+      tags:
+        - benchmark-cleanup

+ 74 - 7
playbooks/04_models.yml

@@ -11,7 +11,9 @@
   vars:
     model_selection_file: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
     modelfiles_dir: /mnt/ai_data/ollama_models/modelfiles
-    slot4_model: ""
+    slot4_model: ""   # legacy override kept for backwards compatibility
+    slot5_model: ""   # overrides slot5_general_rotate
+    slot6_model: ""   # overrides slot6_coding_rotate
     ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
 
   tasks:
@@ -38,13 +40,31 @@
       tags:
         - models-load
 
+    - name: "Models | Apply slot5 override if provided"
+      ansible.builtin.set_fact:
+        model_selection: "{{ model_selection | combine({'slot5_general_rotate': slot5_model}) }}"
+      when: slot5_model | length > 0
+      tags:
+        - models-load
+
+    - name: "Models | Apply slot6 override if provided"
+      ansible.builtin.set_fact:
+        model_selection: "{{ model_selection | combine({'slot6_coding_rotate': slot6_model}) }}"
+      when: slot6_model | length > 0
+      tags:
+        - models-load
+
     - name: "Models | Display selected models"
       ansible.builtin.debug:
         msg:
-          - "Slot 1 (General Primary):   {{ model_selection.slot1_general }}"
-          - "Slot 2 (General Secondary):  {{ model_selection.slot2_general }}"
-          - "Slot 3 (Coding Primary):    {{ model_selection.slot3_coding }}"
-          - "Slot 4 (Coding Secondary):  {{ model_selection.slot4_coding }}"
+          - "=== Node 1 — General (port 11434) ==="
+          - "Slot 1 (locked):  {{ model_selection.slot1_general }}"
+          - "Slot 2 (locked):  {{ model_selection.slot2_general }}"
+          - "Slot 5 (rotate):  {{ model_selection.slot5_general_rotate | default('none') }}"
+          - "=== Node 0 — Coding (port 11435) ==="
+          - "Slot 3 (locked):  {{ model_selection.slot3_coding }}"
+          - "Slot 4 (locked):  {{ model_selection.slot4_coding }}"
+          - "Slot 6 (rotate):  {{ model_selection.slot6_coding_rotate | default('none') }}"
       tags:
         - models-load
 
@@ -72,8 +92,10 @@
       loop:
         - "{{ model_selection.slot1_general }}"
         - "{{ model_selection.slot2_general }}"
+        - "{{ model_selection.slot5_general_rotate | default('none') }}"
         - "{{ model_selection.slot3_coding }}"
         - "{{ model_selection.slot4_coding }}"
+        - "{{ model_selection.slot6_coding_rotate | default('none') }}"
       when:
         - item | length > 0
         - item != 'none'
@@ -130,6 +152,20 @@
       tags:
         - models-modelfile
 
+    - name: "Models | Template coder-rotate Modelfile"
+      ansible.builtin.copy:
+        content: |
+          FROM {{ model_selection.slot6_coding_rotate }}
+          PARAMETER num_ctx 32768
+          SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
+        dest: "{{ modelfiles_dir }}/Modelfile.coder-rotate"
+        mode: "0644"
+      when:
+        - model_selection.slot6_coding_rotate | default('') | length > 0
+        - model_selection.slot6_coding_rotate | default('none') != 'none'
+      tags:
+        - models-modelfile
+
     - name: "Models | Template llama-family Modelfile"
       ansible.builtin.copy:
         content: |
@@ -156,8 +192,9 @@
     - name: "Models | Register custom models with Ollama"
       ansible.builtin.command: "ollama create {{ item.name }} -f {{ modelfiles_dir }}/{{ item.file }}"
       loop:
-        - { name: "coder-128k", file: "Modelfile.coder-128k" }
-        - { name: "coder-32k",  file: "Modelfile.coder-32k",  slot: "{{ model_selection.slot4_coding }}" }
+        - { name: "coder-128k",   file: "Modelfile.coder-128k" }
+        - { name: "coder-32k",    file: "Modelfile.coder-32k",    slot: "{{ model_selection.slot4_coding }}" }
+        - { name: "coder-rotate", file: "Modelfile.coder-rotate", slot: "{{ model_selection.slot6_coding_rotate | default('none') }}" }
         - { name: "llama-family", file: "Modelfile.llama-family" }
         - { name: "gemma-family", file: "Modelfile.gemma-family" }
       when: item.slot is not defined or (item.slot | length > 0 and item.slot != 'none')
@@ -201,3 +238,33 @@
         state: started
       tags:
         - models-warmup
+
+    # ── Node0 warmup service ─────────────────────────────────────────
+    - name: "Models | Template node0 warmup script"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/ollama/warmup-node0.sh.j2"
+        dest: /usr/local/bin/ollama-warmup-node0.sh
+        mode: "0755"
+        owner: root
+        group: root
+      tags:
+        - models-warmup
+
+    - name: "Models | Template node0 warmup systemd service"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/systemd/ollama-warmup-node0.service.j2"
+        dest: /etc/systemd/system/ollama-warmup-node0.service
+        mode: "0644"
+        owner: root
+        group: root
+      tags:
+        - models-warmup
+
+    - name: "Models | Enable and start node0 warmup service"
+      ansible.builtin.systemd:
+        name: ollama-warmup-node0
+        enabled: true
+        state: started
+        daemon_reload: true
+      tags:
+        - models-warmup

+ 1 - 1
playbooks/07_openwebui.yml

@@ -90,7 +90,7 @@
         _openwebui_env: >-
           {{
             {
-              'OLLAMA_BASE_URL': 'http://host.docker.internal:11434',
+              'OLLAMA_BASE_URLS': 'http://host.docker.internal:11434;http://host.docker.internal:11435',
               'OLLAMA_API_KEY': ollama_api_key,
               'WEBUI_SECRET_KEY': openwebui_secret_key,
               'WEBUI_AUTH': 'true',

+ 21 - 0
playbooks/08_openclaw.yml

@@ -77,6 +77,27 @@
       tags:
         - openclaw-config
 
+    - name: "OpenClaw | Load model selection for model assignment"
+      ansible.builtin.slurp:
+        src: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
+      delegate_to: localhost
+      become: false
+      register: _model_sel_raw
+      ignore_errors: true
+      when: not skip_openclaw
+      tags:
+        - openclaw-config
+
+    - name: "OpenClaw | Set openclaw_model from benchmark slot 1 (best general)"
+      ansible.builtin.set_fact:
+        openclaw_model: "{{ (_model_sel_raw.content | b64decode | from_json).slot1_general }}"
+      when:
+        - not skip_openclaw
+        - _model_sel_raw is not failed
+        - _model_sel_raw.content is defined
+      tags:
+        - openclaw-config
+
     # ── Install Python dependencies ───────────────────────────────────
     - name: "OpenClaw | Install Python dependencies"
       ansible.builtin.pip:

+ 171 - 0
playbooks/_bench_tier_batch.yml

@@ -0,0 +1,171 @@
+---
+# playbooks/_bench_tier_batch.yml
+# Included by 03_benchmark.yml once per batch of up to 6 models.
+#
+# Expected vars (passed via include_tasks vars:):
+#   _batch_node1  — list of 0–3 model names for port 11434
+#   _batch_node0  — list of 0–3 model names for port 11435
+#
+# Mutates host facts (accumulated across batches):
+#   bench_all_results    — list of uri result dicts
+#   all_eligible_models  — list of model names that passed load
+#
+# Concurrency design:
+#   Load:      node1 and node0 warm-up "Hi" prompts fire simultaneously (async).
+#              Within each node Ollama still loads one model at a time,
+#              but both nodes drain their queues in parallel.
+#   Benchmark: sequential (synchronous uri), one request at a time per node.
+#              Node1 drains fully, then node0. No queue contamination; each
+#              request gets a full idle inference slot and clean eval_duration.
+
+# ── Load models into RAM (both nodes concurrently) ────────────────────────────
+# 3 models per node, sequential within each node → last model waits for 2
+# ahead: max load wait ≤ 2 × load_timeout. Use load_timeout × 4 for margin.
+
+- name: "Benchmark | Load node1 models into RAM (async)"
+  ansible.builtin.uri:
+    url: "http://localhost:11434/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item }}"
+      prompt: "Hi"
+      stream: false
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
+    status_code: 200
+  loop: "{{ _batch_node1 }}"
+  loop_control:
+    label: "node1 load: {{ item }}"
+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
+  poll: 0
+  register: _load_node1_jobs
+  failed_when: false
+
+- name: "Benchmark | Load node0 models into RAM (async)"
+  ansible.builtin.uri:
+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item }}"
+      prompt: "Hi"
+      stream: false
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
+    status_code: 200
+  loop: "{{ _batch_node0 }}"
+  loop_control:
+    label: "node0 load: {{ item }}"
+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
+  poll: 0
+  register: _load_node0_jobs
+  failed_when: false
+
+- name: "Benchmark | Collect node1 load results"
+  ansible.builtin.async_status:
+    jid: "{{ _async_job.ansible_job_id }}"
+  loop: "{{ _load_node1_jobs.results | default([]) }}"
+  loop_control:
+    loop_var: _async_job
+    label: "node1 load: {{ _async_job.item | default('?') }}"
+  register: _load_node1
+  until: _load_node1.finished
+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
+  delay: 15
+  failed_when: false
+
+- name: "Benchmark | Collect node0 load results"
+  ansible.builtin.async_status:
+    jid: "{{ _async_job.ansible_job_id }}"
+  loop: "{{ _load_node0_jobs.results | default([]) }}"
+  loop_control:
+    loop_var: _async_job
+    label: "node0 load: {{ _async_job.item | default('?') }}"
+  register: _load_node0
+  until: _load_node0.finished
+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
+  delay: 15
+  failed_when: false
+
+# ── Identify successfully loaded models ───────────────────────────────────────
+
+- name: "Benchmark | Identify loaded models in batch"
+  ansible.builtin.set_fact:
+    _eligible_node1: "{{ _load_node1.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
+    _eligible_node0: "{{ _load_node0.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
+
+# ── Fire benchmark prompts sequentially (one request at a time per node) ──────
+# Sequential firing ensures each request hits an idle Ollama inference slot:
+# no queue contamination, full CPU budget per request, clean eval_duration.
+# Node1 then node0 run back-to-back; concurrent load phase above is unchanged.
+
+- name: "Benchmark | Fire test prompts at node1"
+  ansible.builtin.uri:
+    url: "http://localhost:11434/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item.0 }}"
+      prompt: "{{ test_prompts[item.1].prompt }}"
+      stream: false
+      options:
+        num_predict: "{{ benchmark_num_predict | int }}"
+        temperature: 0
+        seed: 42
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_large_timeout | int) }}"
+    status_code: 200
+  loop: "{{ _eligible_node1 | product(test_prompts.keys() | list) | list }}"
+  loop_control:
+    label: "{{ item.0 }} / {{ item.1 }}"
+  register: _bench_node1
+  failed_when: false
+
+- name: "Benchmark | Fire test prompts at node0"
+  ansible.builtin.uri:
+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item.0 }}"
+      prompt: "{{ test_prompts[item.1].prompt }}"
+      stream: false
+      options:
+        num_predict: "{{ benchmark_num_predict | int }}"
+        temperature: 0
+        seed: 42
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_large_timeout | int) }}"
+    status_code: 200
+  loop: "{{ _eligible_node0 | product(test_prompts.keys() | list) | list }}"
+  loop_control:
+    label: "{{ item.0 }} / {{ item.1 }}"
+  register: _bench_node0
+  failed_when: false
+
+# ── Accumulate results into play-scoped facts ─────────────────────────────────
+# Synchronous uri populates result.item = [model, prompt_key] at top level —
+# no _async_job indirection needed; compute task in 03_benchmark.yml unchanged.
+
+- name: "Benchmark | Accumulate node1 results"
+  ansible.builtin.set_fact:
+    bench_all_results: "{{ bench_all_results + [item] }}"
+  loop: "{{ _bench_node1.results | default([]) }}"
+  loop_control:
+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
+
+- name: "Benchmark | Accumulate node0 results"
+  ansible.builtin.set_fact:
+    bench_all_results: "{{ bench_all_results + [item] }}"
+  loop: "{{ _bench_node0.results | default([]) }}"
+  loop_control:
+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
+
+- name: "Benchmark | Accumulate eligible models"
+  ansible.builtin.set_fact:
+    all_eligible_models: "{{ all_eligible_models + _eligible_node1 + _eligible_node0 }}"

+ 75 - 45
roles/models/README.md

@@ -2,79 +2,109 @@
 
 ## Purpose
 
-Manage the Ollama model lifecycle -- pulling models, creating custom Modelfile
-configurations, and running a warm-up service to ensure models are loaded into GPU
-memory at boot time.
+Manage the Ollama model lifecycle  pulling models, creating custom Modelfile
+configurations, and running warm-up services to ensure models are loaded into RAM
+at boot time across both NUMA instances.
 
-## Slot System
+## 6-Slot System
 
-| Slot | Role               | Selection Method                         |
-|------|--------------------|------------------------------------------|
-| 1    | Primary Coding     | Highest coding composite from benchmarks |
-| 2    | Primary General    | Highest general composite from benchmarks|
-| 3    | Secondary / Backup | Next-best overall average composite      |
-| 4    | Experimental       | Manual override via `-e slot4_model=<name>` |
+| Slot | Instance      | Port  | Role             | Selection                      | Rotation                    |
+|------|---------------|-------|------------------|--------------------------------|-----------------------------|
+| 1    | Node 1        | 11434 | General (locked) | Top general composite          | Re-benchmark only           |
+| 2    | Node 1        | 11434 | General (locked) | 2nd general composite          | Re-benchmark only           |
+| 5    | Node 1        | 11434 | General (rotate) | 3rd general composite          | `-e slot5_model=<name>`     |
+| 3    | Node 0        | 11435 | Coding (locked)  | Top coding composite           | Re-benchmark only           |
+| 4    | Node 0        | 11435 | Coding (locked)  | 2nd coding composite           | Re-benchmark only           |
+| 6    | Node 0        | 11435 | Coding (rotate)  | 3rd coding composite           | `-e slot6_model=<name>`     |
 
 ## Slot Rotation
 
-To override slot 4 with a specific model at runtime:
+Rotate the general slot on Node 1 (port 11434):
 
 ```bash
-ansible-playbook playbooks/03_ollama.yml -e slot4_model=mistral:7b
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
 ```
 
-Slots 1-3 are automatically assigned based on the latest benchmark results in
-`model_selection.json`. Slot 4 is always user-controlled.
+Rotate the coding slot on Node 0 (port 11435):
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
+```
+
+Both at once:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml \
+  -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
+```
+
+Reset both rotate slots back to benchmark recommendations:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
 
 ## Modelfile Configurations
 
-Custom Modelfile variants are created for fine-tuned context windows and use cases:
+Custom Modelfile variants are created for fine-tuned context windows:
+
+| Custom Model    | Base Slot    | Context | Port  | Use Case                         |
+|-----------------|--------------|---------|-------|----------------------------------|
+| `coder-128k`    | slot3_coding | 32768   | 11435 | Primary coding (large context)   |
+| `coder-32k`     | slot4_coding | 32768   | 11435 | Secondary coding                 |
+| `coder-rotate`  | slot6_coding_rotate | 32768 | 11435 | Rotatable coding model      |
+| `llama-family`  | llama3.2:3b  | 8192    | 11434 | Family-safe general assistant    |
+| `gemma-family`  | llama3.1:8b  | 8192    | 11434 | Family-safe general assistant    |
+
+**These aliases are excluded from benchmarking** via `benchmark_skip_aliases` — their
+32k-token parameter allocations stall the benchmark loop with 285-second responses.
+
+## Warm-up Services
+
+Two oneshot systemd services pre-load models after their respective Ollama instances start:
 
-| Custom Model          | Base Model           | Context Window | Use Case                    |
-|-----------------------|----------------------|----------------|-----------------------------|
-| `coding-primary`     | (slot 1 model)       | 32768          | Code generation and debugging |
-| `general-primary`    | (slot 2 model)       | 16384          | General conversation and reasoning |
-| `backup`             | (slot 3 model)       | 16384          | Fallback for either category |
-| `experimental`       | (slot 4 model)       | 8192           | Testing new models           |
+| Service                      | Warms               | Instance            |
+|------------------------------|---------------------|---------------------|
+| `ollama-warmup.service`      | slots 1, 2, 5       | Node 1 (port 11434) |
+| `ollama-warmup-node0.service`| slots 3, 4, 6       | Node 0 (port 11435) |
 
-## Warm-up Service
+`OLLAMA_KEEP_ALIVE=-1` keeps models pinned once loaded. The warmup services only
+need to run once after boot; subsequent requests hit already-loaded models immediately.
 
-The role deploys `ollama-warmup.service`, a oneshot systemd service that runs after
-`ollama.service` starts.
+Check warmup status:
 
-**Why it is needed:** Even though `OLLAMA_KEEP_ALIVE=-1` keeps models loaded in GPU
-memory indefinitely once loaded, Ollama does not automatically load models on
-startup. The warm-up service sends a minimal inference request to each slot model,
-triggering the initial load into GPU memory. Without this, the first user request
-to each model would experience a long delay while the model is loaded.
+```bash
+systemctl status ollama-warmup ollama-warmup-node0
+```
 
-The warm-up service:
+Re-run warmup manually (e.g. after rotating a slot):
 
-1. Waits for Ollama API to be healthy
-2. Sends a short prompt to each configured slot model
-3. Exits after all models are loaded
+```bash
+systemctl restart ollama-warmup          # Node 1 general models
+systemctl restart ollama-warmup-node0    # Node 0 coding models
+```
 
 ## model_selection.json
 
-The model selection file is read by this role to determine which models to assign to
-each slot. Schema:
+`playbooks/04_models.yml` reads `benchmarks/results/model_selection.json`:
 
 ```json
 {
-  "timestamp": "2025-01-15T10:30:00Z",
-  "slot1_coding": "qwen2.5-coder:14b",
-  "slot2_general": "llama3.1:8b",
-  "slot3_backup": "deepseek-coder-v2:16b",
-  "slot4_experimental": null
+  "slot1_general": "llama3.1:8b",
+  "slot2_general": "mistral:latest",
+  "slot5_general_rotate": "llama3.2:3b",
+  "slot3_coding": "deepseek-coder-v2:16b",
+  "slot4_coding": "qwen2.5-coder:7b",
+  "slot6_coding_rotate": "codegemma:7b",
+  "general_ranking": [...],
+  "coding_ranking": [...],
+  "all_metrics": { ... }
 }
 ```
 
-If `model_selection.json` does not exist (first run before benchmarks), the role
-falls back to default models defined in `group_vars/all.yml`.
-
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags models
-ansible-playbook playbooks/site.yml --tags warmup
+ansible-playbook playbooks/site.yml --tags models -K -e @local.yml
+ansible-playbook playbooks/site.yml --tags models-warmup -K -e @local.yml
 ```

+ 52 - 41
roles/ollama/README.md

@@ -2,69 +2,80 @@
 
 ## Purpose
 
-Install, configure, and maintain the Ollama inference server on the AI server host.
+Install, configure, and maintain Ollama inference server(s) on the AI server host.
+Two instances run simultaneously — one per NUMA socket — to utilize both CPU sockets
+on the Dell M630 (2× E5-2690v4).
 
-## Installation
+## Instances
 
-Ollama is installed using the official install script, which places the binary at
-`/usr/local/bin/ollama` and creates a systemd service. The script handles both fresh
-installs and upgrades.
+| Service                | Port  | NUMA Node | CPUs (physical only) | RAM binding | Purpose          |
+|------------------------|-------|-----------|----------------------|-------------|------------------|
+| `ollama.service`       | 11434 | Node 1    | 1 3 5 … 27 (odd)     | `--membind=1` | General models |
+| `ollama-node0.service` | 11435 | Node 0    | 0 2 4 … 26 (even)    | `--membind=0` | Coding models  |
 
-## Environment Variables
+Both instances share the same model storage directory (`/mnt/ai_data/ollama_models`)
+and Ollama API key. Weights are loaded once into the NUMA node's memory; they are not
+duplicated between instances.
 
-Configuration is applied via a systemd drop-in override file at
-`/etc/systemd/system/ollama.service.d/override.conf`.
+## Configuration
 
-| Variable                  | Value              | Description                                      |
-|---------------------------|--------------------|--------------------------------------------------|
-| `OLLAMA_HOST`             | `0.0.0.0:11434`   | Listen on all interfaces, port 11434             |
-| `OLLAMA_MODELS`           | `/mnt/ai_data/ollama/models` | Model storage directory                |
-| `OLLAMA_KEEP_ALIVE`       | `-1`               | Keep models loaded in GPU memory indefinitely    |
-| `OLLAMA_NUM_PARALLEL`     | `4`                | Number of parallel inference requests            |
-| `OLLAMA_MAX_LOADED_MODELS`| `4`                | Maximum models loaded in GPU memory at once      |
-| `OLLAMA_API_KEY`          | (from Vault)       | API key for authentication                       |
-| `OLLAMA_FLASH_ATTENTION`  | `1`                | Enable Flash Attention for performance           |
-| `OLLAMA_CONTEXT_LENGTH`   | `32768`            | Default context window size                      |
+### Node 1 — systemd override
 
-## Override.conf Approach
+Applied via `/etc/systemd/system/ollama.service.d/override.conf` (templated from
+`templates/ollama/override.conf.j2`):
 
-Rather than modifying the upstream systemd unit file (which would be overwritten on
-upgrades), this role uses a systemd drop-in directory:
+| Variable                   | Value                        | Description                                      |
+|----------------------------|------------------------------|--------------------------------------------------|
+| `OLLAMA_API_KEY`           | (from Vault)                 | Shared key for all API requests                  |
+| `OLLAMA_HOST`              | `0.0.0.0:11434`              | Listen on all interfaces, port 11434             |
+| `OLLAMA_MODELS`            | `/mnt/ai_data/ollama_models` | Shared model storage                             |
+| `OLLAMA_KEEP_ALIVE`        | `-1`                         | Never unload models from RAM                     |
+| `OLLAMA_FLASH_ATTENTION`   | `1`                          | Fused softmax — ~20% less memory bandwidth       |
+| `OLLAMA_NUM_THREADS`       | `14`                         | Physical cores on NUMA node 1 only               |
+| `OLLAMA_NUM_PARALLEL`      | `2`                          | Concurrent inference streams per instance        |
+| `OLLAMA_MAX_LOADED_MODELS` | `3`                          | 3 models warm per instance (6 total)             |
+| `CPUAffinity`              | `1 3 5 … 27`                 | Odd CPUs = socket 1 physical cores               |
+| `ExecStart`                | `numactl --membind=1 ollama serve` | Pin memory allocations to Node 1 RAM        |
 
-```
-/etc/systemd/system/ollama.service.d/override.conf
-```
+### Node 0 — standalone systemd unit
+
+Deployed to `/etc/systemd/system/ollama-node0.service` (from
+`templates/ollama/ollama-node0.service.j2`). Uses the same variables but with:
 
-This ensures environment variables survive Ollama upgrades while keeping the
-upstream service file intact.
+| Variable   | Value           |
+|------------|-----------------|
+| `OLLAMA_HOST` | `0.0.0.0:11435` |
+| `CPUAffinity` | `0 2 4 … 26` |
+| `ExecStart`   | `numactl --membind=0 ollama serve` |
 
-## Why OLLAMA_API_KEY
+## NUMA Rationale
 
-Without an API key, anyone with network access to port 11434 can use the Ollama API
-to run inference, pull models, or delete models. Setting `OLLAMA_API_KEY` requires
-all API requests to include an `Authorization: Bearer <key>` header, preventing
-unauthenticated access.
+On the M630 with dual E5-2690v4:
+- **Node 1** (odd CPUs) has ~120 GB free RAM — assigned general models (larger)
+- **Node 0** (even CPUs) has ~75 GB free RAM — assigned coding models
+
+Without `numactl --membind`, the OS allocates model weights and KV cache across both
+nodes, causing cross-socket memory traffic (~40 GB/s vs ~68–75 GB/s local).
+`CPUAffinity` alone sets the scheduler; `numactl` sets the memory policy.
 
 ## OLLAMA_FLASH_ATTENTION
 
-Flash Attention is a GPU memory optimization that reduces memory usage and increases
-throughput for transformer inference. Setting `OLLAMA_FLASH_ATTENTION=1` enables
-this optimization for all models. This is a newer addition to Ollama and provides
-measurable performance improvements.
+Enables fused softmax kernel — reduces attention memory bandwidth by ~20% and improves
+throughput at all context lengths on AVX2 (E5-2690v4). Note: `OLLAMA_KV_CACHE_TYPE`
+is intentionally **not** set — q8_0 dequantization overhead regressed throughput on
+this CPU despite the bandwidth savings.
 
 ## Upgrade Procedure
 
-To upgrade Ollama to the latest version:
-
 ```bash
-ansible-playbook playbooks/03_ollama.yml
+ansible-playbook playbooks/02_infrastructure.yml -K -e @local.yml --tags ollama
 ```
 
-The official install script detects the existing installation and performs an
-in-place upgrade. The service is restarted after the upgrade.
+The official install script detects the existing installation and performs an in-place
+upgrade. Both `ollama.service` and `ollama-node0.service` are restarted.
 
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags ollama
+ansible-playbook playbooks/site.yml --tags ollama -K -e @local.yml
 ```

+ 31 - 25
roles/openclaw/README.md

@@ -3,56 +3,62 @@
 ## Purpose
 
 Deploy OpenClaw, a Telegram bot that provides access to Ollama models via Telegram
-messaging.
+messaging. Always uses the best warm general-purpose model (`slot1_general` from the
+last benchmark run).
 
 ## Prerequisites
 
 - A Telegram bot token obtained from [@BotFather](https://t.me/BotFather)
 - The token must be stored in Vault at `{{ vault_secret_prefix }}/openclaw:telegram_token`
+- `benchmarks/results/model_selection.json` must exist (produced by `03_benchmark.yml`)
 
-## Installation
+## Model Selection
 
-1. Node.js 20 is installed on the target host
-2. OpenClaw is installed globally via `npm install -g openclaw`
-3. A systemd service (`openclaw.service`) is created for process management
+`08_openclaw.yml` reads `benchmarks/results/model_selection.json` at deploy time and
+sets `openclaw_model` to `slot1_general` — the highest-scoring general model that is
+always warm on the Node 1 instance (port 11434). This ensures the bot always uses the
+best available model without requiring manual updates after a benchmark run.
 
-## Configuration
+The fallback value (used when `model_selection.json` is absent) is set in
+`inventory/group_vars/all.yml` under `openclaw_model`.
 
-Config file location: `/mnt/ai_data/openclaw/config.yml`
+## Ollama Endpoint
 
-The configuration includes:
+OpenClaw connects to `localhost:11434` — the Node 1 general instance. Coding models on
+port 11435 are not accessible to the bot; they are reserved for IDE and API integrations.
 
-- Ollama API endpoint and authentication
-- Telegram bot token (read from Vault)
-- Default model selection
-- Allowed user IDs (if access control is needed)
+## Installation
 
-## Service
+1. Python 3 dependencies (`python-telegram-bot`, `requests`, `pyyaml`) are installed via `pip3`
+2. The bot script is deployed to `/mnt/ai_data/openclaw/bot.py`
+3. Config is templated to `/mnt/ai_data/openclaw/config.yml`
+4. A systemd service (`openclaw.service`) manages the process
 
-```
-/etc/systemd/system/openclaw.service
-```
+## Configuration
+
+Config file location: `/mnt/ai_data/openclaw/config.yml`
 
-The service runs as a systemd unit, automatically starting on boot and restarting
-on failure.
+The configuration includes:
+- Ollama API endpoint (`http://localhost:11434`) and API key (from Vault)
+- Telegram bot token (from Vault)
+- Model name (from `slot1_general`)
 
 ## Vault Integration
 
-The Telegram bot token is stored in Vault:
-
 - **Path:** `{{ vault_secret_prefix }}/openclaw`
 - **Key:** `telegram_token`
 
-The role reads the token from Vault at deploy time and writes it to the config file.
+The Telegram token is read from Vault at deploy time and written to the config file.
 
 ## Skipping Installation
 
-If no Telegram bot token is configured (the Vault secret is empty or absent),
-the OpenClaw installation is skipped entirely during `site.yml`. This allows
-running the full playbook without a Telegram bot token if the feature is not needed.
+If no Telegram bot token is configured (Vault secret absent or empty), the entire
+OpenClaw installation is skipped. This allows running `site.yml` without a Telegram
+bot token.
 
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags openclaw
+ansible-playbook playbooks/site.yml --tags openclaw -K -e @local.yml
+ansible-playbook playbooks/08_openclaw.yml -K -e @local.yml
 ```

+ 36 - 39
roles/openwebui/README.md

@@ -2,30 +2,41 @@
 
 ## Purpose
 
-Deploy Open WebUI with full Ollama integration, RAG support via Qdrant, and SSO via
-Keycloak OIDC.
+Deploy Open WebUI with full Ollama integration across both NUMA instances, RAG support
+via Qdrant, and SSO via Keycloak OIDC.
+
+## Ollama Backend Configuration
+
+Open WebUI connects to **both** Ollama instances simultaneously via `OLLAMA_BASE_URLS`.
+It load-balances requests across them and presents models from both as a single unified
+list.
+
+| Instance      | Port  | Models              |
+|---------------|-------|---------------------|
+| Node 1        | 11434 | General (slots 1-2-5) |
+| Node 0        | 11435 | Coding (slots 3-4-6) |
 
 ## Environment Variables
 
-| Variable                      | Value                                                        | Source      |
-|-------------------------------|--------------------------------------------------------------|-------------|
-| `OLLAMA_BASE_URL`             | `http://host.docker.internal:11434`                         | Hardcoded   |
-| `OLLAMA_API_KEY`              | (Ollama API key)                                             | Vault       |
-| `WEBUI_SECRET_KEY`            | (session signing key)                                        | Vault       |
-| `VECTOR_DB`                   | `qdrant`                                                     | Hardcoded   |
-| `QDRANT_URI`                  | `http://host.docker.internal:6333`                          | Hardcoded   |
-| `ENABLE_RAG_WEB_SEARCH`      | `true`                                                       | Hardcoded   |
-| `OAUTH_CLIENT_ID`            | `open-webui`                                                 | Hardcoded   |
-| `OAUTH_CLIENT_SECRET`        | (OIDC client secret)                                         | Vault       |
-| `OPENID_PROVIDER_URL`        | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration` | Vault (keycloak_oidc_url) |
-| `OAUTH_PROVIDER_NAME`        | `{{ platform_name }}`                                        | group_vars  |
-| `ENABLE_OAUTH_SIGNUP`        | `true`                                                       | Hardcoded   |
-| `DEFAULT_USER_ROLE`          | `user`                                                       | Hardcoded   |
-| `WEBUI_NAME`                 | `{{ platform_name }}`                                        | group_vars  |
-| `ENABLE_OAUTH_ROLE_MANAGEMENT` | `true`                                                     | Hardcoded   |
-| `OAUTH_ROLES_CLAIM`          | `realm_access.roles`                                         | Hardcoded   |
-| `OAUTH_ALLOWED_ROLES`        | `ai-user,ai-admin`                                           | Hardcoded   |
-| `OAUTH_ADMIN_ROLES`          | `ai-admin`                                                   | Hardcoded   |
+| Variable                      | Value                                                                                     | Source      |
+|-------------------------------|-------------------------------------------------------------------------------------------|-------------|
+| `OLLAMA_BASE_URLS`            | `http://host.docker.internal:11434;http://host.docker.internal:11435`                    | Hardcoded   |
+| `OLLAMA_API_KEY`              | (Ollama API key)                                                                          | Vault       |
+| `RAG_OLLAMA_BASE_URL`         | `http://host.docker.internal:11434`                                                       | Hardcoded   |
+| `WEBUI_SECRET_KEY`            | (session signing key)                                                                     | Vault       |
+| `VECTOR_DB`                   | `qdrant`                                                                                  | Hardcoded   |
+| `QDRANT_URI`                  | `http://host.docker.internal:6333`                                                        | Hardcoded   |
+| `OAUTH_CLIENT_ID`             | `open-webui`                                                                              | Hardcoded   |
+| `OAUTH_CLIENT_SECRET`         | (OIDC client secret)                                                                      | Vault       |
+| `OPENID_PROVIDER_URL`         | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration`           | Vault       |
+| `OAUTH_PROVIDER_NAME`         | `{{ platform_name }}`                                                                     | group_vars  |
+| `ENABLE_OAUTH_SIGNUP`         | `true`                                                                                    | Hardcoded   |
+| `ENABLE_OAUTH_ROLE_MANAGEMENT`| `true`                                                                                    | Hardcoded   |
+| `OAUTH_ROLES_CLAIM`           | `realm_access.roles`                                                                      | Hardcoded   |
+| `OAUTH_ALLOWED_ROLES`         | `ai-user,ai-admin`                                                                        | Hardcoded   |
+| `OAUTH_ADMIN_ROLES`           | `ai-admin`                                                                                | Hardcoded   |
+| `DEFAULT_MODELS`              | `llama-family`                                                                            | Hardcoded   |
+| `WEBUI_NAME`                  | `{{ platform_name }}`                                                                     | group_vars  |
 
 ## OIDC Setup
 
@@ -38,22 +49,12 @@ Open WebUI uses Keycloak as its OIDC provider:
 ## RAG
 
 - **Vector DB:** Qdrant at `http://host.docker.internal:6333`
-- **Web search:** enabled via `ENABLE_RAG_WEB_SEARCH=true`
-- Users can upload documents through the Open WebUI interface for RAG-augmented
-  conversations
-
-## Model Access
-
-Open WebUI connects to Ollama at `http://host.docker.internal:11434` (the Docker
-host network). The `OLLAMA_API_KEY` environment variable authenticates API requests
-to the Ollama server.
+- `RAG_OLLAMA_BASE_URL` is pinned to port 11434 (Node 1) for embedding requests —
+  keeping RAG on a single stable endpoint avoids split-brain embedding indices
+- Users can upload documents through the Open WebUI interface for RAG-augmented conversations
 
 ## SSO
 
-Users see a "Sign in with {{ platform_name }}" button on the login page. Clicking it
-redirects to the Keycloak login page for the `{{ keycloak_realm }}` realm. After
-authentication, users are redirected back to Open WebUI.
-
 Access is restricted by Keycloak realm role:
 
 | Keycloak role | Open WebUI access      |
@@ -62,12 +63,8 @@ Access is restricted by Keycloak realm role:
 | `ai-admin`    | ✅ Admin               |
 | *(none)*      | ❌ Login blocked       |
 
-New users who authenticate via SSO are automatically created. Their Open WebUI role
-is set based on `OAUTH_ADMIN_ROLES` — users with `ai-admin` get admin access,
-all others get standard user access.
-
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags openwebui
+ansible-playbook playbooks/site.yml --tags openwebui -K -e @local.yml
 ```

+ 26 - 0
templates/ollama/ollama-node0.service.j2

@@ -0,0 +1,26 @@
+[Unit]
+Description=Ollama Service — NUMA Node 0 (Coding Models)
+After=network-online.target ollama.service
+Wants=network-online.target
+
+[Service]
+ExecStart=/usr/bin/numactl --cpunodebind=0 {{ ollama_binary_path }} serve
+Environment="OLLAMA_API_KEY={{ ollama_api_key }}"
+Environment="OLLAMA_HOST=0.0.0.0:{{ ollama_node0_port }}"
+Environment="OLLAMA_MODELS=/mnt/ai_data/ollama_models"
+Environment="OLLAMA_KEEP_ALIVE={{ ollama_keep_alive }}"
+Environment="OLLAMA_FLASH_ATTENTION={{ ollama_flash_attention }}"
+Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
+Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
+Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
+CPUAffinity={{ ollama_node0_cpu_affinity }}
+LimitMEMLOCK=infinity
+LimitNOFILE=65535
+OOMScoreAdjust=-500
+Restart=always
+RestartSec=3
+User=ollama
+Group=ollama
+
+[Install]
+WantedBy=multi-user.target

+ 16 - 15
templates/ollama/override.conf.j2

@@ -9,12 +9,6 @@ Environment="OLLAMA_KEEP_ALIVE=-1"
 # Flash attention: fused softmax, ~20% less memory bandwidth, faster on AVX2
 Environment="OLLAMA_FLASH_ATTENTION=1"
 
-# KV cache quantization: q8_0 halves KV cache memory vs fp16.
-# Attention reads dominate memory bandwidth at long contexts; smaller KV =
-# fewer bytes transferred per token generated. q8_0 over q4_0: negligible
-# quality loss vs significant noise at long contexts with q4_0.
-Environment="OLLAMA_KV_CACHE_TYPE=q8_0"
-
 # Threads: 14 physical cores on NUMA node 1 only (no hyperthreads).
 # LLM inference is memory-bandwidth-bound; HT siblings share the same memory
 # pipeline and add scheduling overhead without adding bandwidth.
@@ -24,19 +18,26 @@ Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
 # Keeps per-request throughput high for interactive/single-user workloads.
 Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
 
-# Keep 4 models warm in RAM (KEEP_ALIVE=-1 means never unload)
+# Keep 3 models warm in RAM per instance (KEEP_ALIVE=-1 means never unload; 6 total across both sockets)
 Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
 
 # ── NUMA / CPU binding ────────────────────────────────────────────────────
-# ExecStart override: numactl --membind=1 guarantees model weights and KV
-# cache are allocated from NUMA node 1 RAM (120 GB free). CPUAffinity alone
-# does not set the memory policy; numactl makes it explicit.
+# numactl --cpunodebind pins the scheduler to all logical CPUs on node 1
+# (14 physical + 14 HT siblings = 28 CPUs). This avoids two failure modes:
+#
+#  1. numactl --membind=1 (MPOL_BIND) suppresses khugepaged THP promotion
+#     for the model's ~2.75 GB anonymous allocation, causing ~700k 4 KB TLB
+#     entries and near-100% L2-STLB miss rate → 128x throughput loss.
+#
+#  2. CPUAffinity restricted to 14 physical cores only forces ~56 Go runtime
+#     OS threads to compete with 14 GGML compute threads on 14 CPUs (5:1
+#     oversubscription). GGML busy-wait barriers then block waiting threads
+#     from checking in → cascading stall across ~400 ops/token → 128x loss.
+#
+# --cpunodebind (sched_setaffinity only, no set_mempolicy) gives 28 CPUs and
+# MPOL_DEFAULT, so allocations go to node 1 naturally and THP works freely.
 ExecStart=
-ExecStart=/usr/bin/numactl --membind=1 {{ ollama_binary_path }} serve
-
-# Restrict scheduler to physical cores on node 1 only (odd CPUs 1–27).
-# Omitting HT siblings (29–55) prevents cross-HT contention on the memory bus.
-CPUAffinity={{ ollama_cpu_affinity }}
+ExecStart=/usr/bin/numactl --cpunodebind={{ ollama_numa_node }} {{ ollama_binary_path }} serve
 
 # ── Memory hardening ───────────────────────────────────────────────────────
 # Prevent model weights from being paged out under memory pressure

+ 28 - 0
templates/ollama/warmup-node0.sh.j2

@@ -0,0 +1,28 @@
+#!/bin/bash
+# Ollama Node 0 model warm-up script (coding models, port {{ ollama_node0_port }})
+# Sends a 1-token generation to each slot model to pin them in RAM
+
+set -e
+
+OLLAMA_URL="http://localhost:{{ ollama_node0_port }}"
+API_KEY="{{ ollama_api_key }}"
+
+warmup_model() {
+    local model="$1"
+    echo "[warmup-node0] Loading: $model"
+    curl -sf -X POST "${OLLAMA_URL}/api/generate" \
+        -H "Authorization: Bearer ${API_KEY}" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"${model}\",\"prompt\":\"Hi\",\"stream\":false,\"options\":{\"num_predict\":1}}" \
+        > /dev/null || echo "[warmup-node0] Warning: failed to warm up ${model}"
+    echo "[warmup-node0] Done: $model"
+}
+
+warmup_model "{{ model_selection.slot3_coding }}"
+warmup_model "{{ model_selection.slot4_coding }}"
+{% if model_selection.slot6_coding_rotate | default('') | length > 0
+      and model_selection.slot6_coding_rotate | default('none') != 'none' %}
+warmup_model "{{ model_selection.slot6_coding_rotate }}"
+{% endif %}
+
+echo "[warmup-node0] All Node 0 coding models warmed up."

+ 4 - 4
templates/ollama/warmup.sh.j2

@@ -20,9 +20,9 @@ warmup_model() {
 
 warmup_model "{{ model_selection.slot1_general }}"
 warmup_model "{{ model_selection.slot2_general }}"
-warmup_model "{{ model_selection.slot3_coding }}"
-{% if model_selection.slot4_coding | length > 0 and model_selection.slot4_coding != 'none' %}
-warmup_model "{{ model_selection.slot4_coding }}"
+{% if model_selection.slot5_general_rotate | default('') | length > 0
+      and model_selection.slot5_general_rotate | default('none') != 'none' %}
+warmup_model "{{ model_selection.slot5_general_rotate }}"
 {% endif %}
 
-echo "[warmup] All models warmed up."
+echo "[warmup] All Node 1 general models warmed up."

+ 14 - 0
templates/systemd/ollama-warmup-node0.service.j2

@@ -0,0 +1,14 @@
+[Unit]
+Description=Ollama Model Warm-Up — Node 0 (Coding)
+After=ollama-node0.service
+Requires=ollama-node0.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=/usr/local/bin/ollama-warmup-node0.sh
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target

+ 15 - 0
templates/vault/vault-unseal.service.j2

@@ -0,0 +1,15 @@
+[Unit]
+Description=HashiCorp Vault Auto-Unseal
+Documentation=https://developer.hashicorp.com/vault/docs/concepts/seal
+After=vault.service network.target
+Requires=vault.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/vault-unseal.sh
+RemainAfterExit=no
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target

+ 11 - 11
templates/vault/vault-unseal.sh.j2

@@ -1,25 +1,25 @@
 #!/bin/bash
-# Vault auto-unseal script
-# Reads unseal key from vault-init.json and unseals Vault
+# Vault auto-unseal script — managed by Ansible, do not edit manually
+# Reads unseal key from /etc/vault.d/unseal.key and unseals Vault
 
 set -e
 
-VAULT_ADDR="http://127.0.0.1:8200"
-INIT_FILE="/docker_mounts/vault/vault-init.json"
+VAULT_ADDR="http://127.0.0.1:{{ vault_port }}"
+UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
 
-if [ ! -f "$INIT_FILE" ]; then
-    echo "ERROR: vault-init.json not found at $INIT_FILE"
+if [ ! -f "$UNSEAL_KEY_FILE" ]; then
+    echo "ERROR: unseal key not found at $UNSEAL_KEY_FILE"
     exit 1
 fi
 
-UNSEAL_KEY=$(jq -r '.unseal_keys_b64[0]' "$INIT_FILE")
+UNSEAL_KEY=$(cat "$UNSEAL_KEY_FILE")
 
 if [ -z "$UNSEAL_KEY" ]; then
-    echo "ERROR: Could not extract unseal key from $INIT_FILE"
+    echo "ERROR: unseal key file is empty"
     exit 1
 fi
 
-# Wait for Vault to be ready
+# Wait for Vault API to become ready (up to 60 s)
 for i in $(seq 1 30); do
     STATUS=$(curl -sf "${VAULT_ADDR}/v1/sys/health" 2>/dev/null || true)
     if [ -n "$STATUS" ]; then
@@ -30,7 +30,7 @@ for i in $(seq 1 30); do
         fi
         break
     fi
-    echo "Waiting for Vault... ($i/30)"
+    echo "Waiting for Vault API... ($i/30)"
     sleep 2
 done
 
@@ -38,5 +38,5 @@ echo "Unsealing Vault..."
 curl -sf -X PUT "${VAULT_ADDR}/v1/sys/unseal" \
     -H "Content-Type: application/json" \
     -d "{\"key\": \"${UNSEAL_KEY}\"}"
-
+echo ""
 echo "Vault unsealed successfully."

+ 74 - 0
tftsr_nginx-hardening/CLAUDE.md

@@ -0,0 +1,74 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Target Environment
+
+- **OS:** RHEL 9.6, **NGINX:** 1.20.1 at `/etc/nginx/`, **Ansible:** `ansible_connection: local`
+- **TLS certs:** `/etc/letsencrypt/live/tftsr.com-0001/{fullchain,privkey}.pem`
+- **Services proxied:** 15 internal services on `*.tftsr.com` / `tftsr.com`
+- `sudo dnf install -y ansible-core` is required before first run (not managed by this project)
+
+## Run Commands
+
+```bash
+# Full hardening (all three roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges from ipdeny.com (run periodically)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+## Architecture
+
+Three independent roles, each runnable standalone via `playbooks/`:
+
+### `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
+
+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. The 4 services that already have HTTP→HTTPS redirects (keycloak-proxy, vault, ollama-api, vaultwarden) are not in `nginx_redirect_services`. Do not add `ssl_session_cache` to `00-ssl-params.conf` — all service configs already declare `shared:SSL:1m` in their server blocks and a conflicting http-level declaration will break `nginx -t`.
+
+### `fail2ban`
+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
+- `sshd` → `/var/log/secure`
+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
+
+### `geo_blocking`
+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
+
+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
+
+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
+
+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
+```bash
+# On a machine WITH internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+
+# Then run with the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
+
+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
+
+## Key Design Decisions
+
+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.

+ 4 - 0
tftsr_nginx-hardening/ansible.cfg

@@ -0,0 +1,4 @@
+[defaults]
+inventory = inventory/hosts.yml
+roles_path = roles
+host_key_checking = False

+ 4 - 0
tftsr_nginx-hardening/inventory/hosts.yml

@@ -0,0 +1,4 @@
+all:
+  hosts:
+    localhost:
+      ansible_connection: local

+ 73 - 0
tftsr_nginx-hardening/nginx-hardening/CLAUDE.md

@@ -0,0 +1,73 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Target Environment
+
+- **OS:** RHEL 9, **NGINX:** 1.20+ at `/etc/nginx/`
+- Playbooks target `hosts: all` — configure the target in `inventory/hosts.yml`
+- `sudo dnf install -y ansible-core` is required on the control node before first run
+
+## Run Commands
+
+```bash
+# Full hardening (all three roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges from ipdeny.com (run periodically)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+## Architecture
+
+Three independent roles, each runnable standalone via `playbooks/`:
+
+### `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
+
+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. Only list services in `nginx_redirect_services` that are **missing** a port-80 redirect — services that already have one must be excluded or NGINX will have duplicate `server_name` entries. Do not add `ssl_session_cache` to `00-ssl-params.conf` — if any existing service configs already declare `shared:SSL:Xm` in their server blocks, a conflicting http-level declaration with a different size will break `nginx -t`.
+
+### `fail2ban`
+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
+- `sshd` → `/var/log/secure`
+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
+
+### `geo_blocking`
+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
+
+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
+
+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
+
+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
+```bash
+# On a machine WITH internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+rsync -av --no-group /tmp/geo_zones/ user@your-host:/opt/geo_zones/
+
+# Then run with the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
+
+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
+
+## Key Design Decisions
+
+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.

+ 179 - 0
tftsr_nginx-hardening/nginx-hardening/README.md

@@ -0,0 +1,179 @@
+# nginx-hardening
+
+Ansible project to harden an NGINX reverse proxy to a production security posture. Applies security headers, TLS hardening, HTTP→HTTPS redirects, fail2ban jails, and nftables-based country geo-blocking — without modifying any existing service configurations.
+
+## Target environment
+
+- **OS:** RHEL 9 / Rocky Linux 9 / AlmaLinux 9
+- **NGINX:** 1.20+ with existing service configs in `/etc/nginx/conf.d/`
+- **EPEL:** Must be installed before running (`dnf install -y epel-release`)
+- **nftables:** Installed but not required to be running (managed by this project)
+- **firewalld:** Should be inactive to avoid nftables coexistence issues
+
+## What it does
+
+### Role: `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they load before all service configs:
+
+| File | Purpose |
+|------|---------|
+| `00-security-headers.conf` | `server_tokens off`, HSTS, X-Frame-Options, X-Content-Type-Options, CSP, rate-limit zone |
+| `00-ssl-params.conf` | TLS 1.2/1.3 only, hardened cipher suite, OCSP stapling, session timeout |
+| `00-proxy-params.conf` | Strips `X-Powered-By`/`Server`, sets `X-Real-IP` and `X-Forwarded-*` headers |
+| `00-http-redirects.conf` | Port-80 → HTTPS 301 redirects for services listed in `nginx_redirect_services` |
+
+**No existing service configs are modified.**
+
+### Role: `fail2ban`
+Installs fail2ban from EPEL and configures three jails:
+
+| Jail | Log | Trigger |
+|------|-----|---------|
+| `sshd` | `/var/log/secure` | Failed SSH logins |
+| `nginx-4xx` | `/var/log/nginx/access.log` | Repeated 4xx responses |
+| `nginx-auth` | `/var/log/nginx/access.log` | Repeated 401/403 responses |
+
+### Role: `geo_blocking`
+Builds a standalone `table inet geo_block` nftables ruleset populated with CIDRs for every country except the US, downloaded from [ipdeny.com](https://www.ipdeny.com). The table is loaded at boot via `/etc/sysconfig/nftables.conf`.
+
+## Prerequisites
+
+On the **Ansible control node** (the machine you run `ansible-playbook` from):
+```bash
+# Ansible itself
+pip install ansible-core
+# or
+dnf install -y ansible-core
+```
+
+On the **target host** (applied automatically by the playbooks):
+- EPEL repo must already be installed
+- SSH access with a user that can `sudo`
+
+## Setup
+
+### 1. Configure your inventory
+
+Edit `inventory/hosts.yml`:
+```yaml
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: 192.168.1.10          # your server's IP or hostname
+      ansible_user: your_ssh_user
+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
+```
+
+### 2. Configure HTTP→HTTPS redirects
+
+Edit `roles/nginx_hardening/defaults/main.yml` and populate `nginx_redirect_services` with any services that are **missing** a port-80 redirect in their existing NGINX config:
+
+```yaml
+nginx_redirect_services:
+  - name: myapp
+    server_name: myapp.example.com
+  - name: dashboard
+    server_name: dashboard.example.com
+```
+
+Services that already have a redirect in their existing `conf.d/` file should **not** be listed here.
+
+### 3. (Optional) Tune defaults
+
+All tunable variables live in each role's `defaults/main.yml`:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `nginx_hsts_max_age` | `31536000` | HSTS max-age in seconds |
+| `nginx_rate_limit_req_zone` | `30r/m` | Rate limit zone definition |
+| `nginx_client_max_body_size` | `10m` | Max upload body size |
+| `fail2ban_bantime` | `3600` | Ban duration (seconds) |
+| `fail2ban_maxretry_ssh` | `5` | SSH failures before ban |
+| `fail2ban_maxretry_nginx_auth` | `5` | 401/403 failures before ban |
+
+## Running
+
+```bash
+# Full hardening (all roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges (run periodically — ipdeny.com updates regularly)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+`-K` prompts for the sudo password. Omit it if your user has passwordless sudo.
+
+## Geo-blocking: servers without direct internet access
+
+If your target server cannot reach `ipdeny.com`, pre-download the zone files on a machine that can and copy them over:
+
+```bash
+# On a machine WITH unrestricted internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+
+# Copy to the target server:
+rsync -av --no-group /tmp/geo_zones/ user@your-server:/opt/geo_zones/
+
+# Run the playbook pointing at the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+
+To make the cache path permanent, add it to your inventory:
+```yaml
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: 192.168.1.10
+      ansible_user: your_ssh_user
+      geo_zone_files_dir: /opt/geo_zones
+```
+
+### Unblocking a country
+
+Set `blocked: false` for the desired country code in `roles/geo_blocking/defaults/main.yml`, then re-run `update_geo_blocks.yml`.
+
+## Verification
+
+After a successful run:
+
+```bash
+# NGINX config is valid
+sudo nginx -t
+
+# Security headers are present
+curl -sI https://your-domain.com | grep -i 'strict\|x-frame\|x-content'
+
+# HTTP redirects to HTTPS
+curl -I http://your-domain.com   # expect: 301 Moved Permanently
+
+# fail2ban jails are active
+sudo fail2ban-client status
+sudo fail2ban-client status nginx-4xx
+
+# nftables geo-block table is loaded
+sudo nft list table inet geo_block
+```
+
+## Files written to the target host
+
+| Path | Action |
+|------|--------|
+| `/etc/nginx/conf.d/00-security-headers.conf` | Created |
+| `/etc/nginx/conf.d/00-ssl-params.conf` | Created |
+| `/etc/nginx/conf.d/00-proxy-params.conf` | Created |
+| `/etc/nginx/conf.d/00-http-redirects.conf` | Created |
+| `/etc/fail2ban/jail.local` | Created |
+| `/etc/fail2ban/filter.d/nginx-4xx.conf` | Created |
+| `/etc/fail2ban/filter.d/nginx-auth.conf` | Created |
+| `/etc/nftables.d/geo-block.nft` | Created |
+| `/etc/sysconfig/nftables.conf` | Appended (include line) |
+
+All tasks that write files use `backup: yes` — a timestamped copy is created automatically before each overwrite.

+ 4 - 0
tftsr_nginx-hardening/nginx-hardening/ansible.cfg

@@ -0,0 +1,4 @@
+[defaults]
+inventory = inventory/hosts.yml
+roles_path = roles
+host_key_checking = False

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml

@@ -0,0 +1,7 @@
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: YOUR_SERVER_IP
+      ansible_user: YOUR_SSH_USER
+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
+      # geo_zone_files_dir: /opt/geo_zones   # set if server cannot reach ipdeny.com

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - fail2ban

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - geo_blocking

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - nginx_hardening

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - geo_blocking

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml

@@ -0,0 +1,7 @@
+---
+fail2ban_bantime: 3600
+fail2ban_findtime: 600
+fail2ban_maxretry_ssh: 5
+fail2ban_maxretry_nginx_4xx: 20
+fail2ban_maxretry_nginx_auth: 5
+fail2ban_ignoreip: "127.0.0.1/8 ::1"

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: restart fail2ban
+  ansible.builtin.service:
+    name: fail2ban
+    state: restarted

+ 41 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml

@@ -0,0 +1,41 @@
+---
+- name: Install fail2ban
+  ansible.builtin.dnf:
+    name: fail2ban
+    state: present
+
+- name: Deploy nginx-4xx filter
+  ansible.builtin.template:
+    src: nginx-4xx.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy nginx-auth filter
+  ansible.builtin.template:
+    src: nginx-auth.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy jail.local configuration
+  ansible.builtin.template:
+    src: jail.local.j2
+    dest: /etc/fail2ban/jail.local
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Enable and start fail2ban service
+  ansible.builtin.service:
+    name: fail2ban
+    state: started
+    enabled: yes

+ 22 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2

@@ -0,0 +1,22 @@
+[DEFAULT]
+ignoreip = {{ fail2ban_ignoreip }}
+bantime  = {{ fail2ban_bantime }}
+findtime = {{ fail2ban_findtime }}
+
+[sshd]
+enabled  = true
+port     = ssh
+logpath  = /var/log/secure
+maxretry = {{ fail2ban_maxretry_ssh }}
+
+[nginx-4xx]
+enabled  = true
+filter   = nginx-4xx
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
+
+[nginx-auth]
+enabled  = true
+filter   = nginx-auth
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_auth }}

+ 3 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
+ignoreregex =

+ 3 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
+ignoreregex =

+ 509 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml

@@ -0,0 +1,509 @@
+---
+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
+geo_nft_table_dir: "/etc/nftables.d"
+geo_nft_file: "/etc/nftables.d/geo-block.nft"
+# Set this to a directory containing pre-downloaded {cc}.zone files when the
+# target host has no outbound internet access. Leave empty to download live.
+geo_zone_files_dir: ""
+
+geo_countries:
+  - code: AD   # Andorra
+    blocked: true
+  - code: AE   # United Arab Emirates
+    blocked: true
+  - code: AF   # Afghanistan
+    blocked: true
+  - code: AG   # Antigua and Barbuda
+    blocked: true
+  - code: AI   # Anguilla
+    blocked: true
+  - code: AL   # Albania
+    blocked: true
+  - code: AM   # Armenia
+    blocked: true
+  - code: AO   # Angola
+    blocked: true
+  - code: AQ   # Antarctica
+    blocked: true
+  - code: AR   # Argentina
+    blocked: true
+  - code: AS   # American Samoa
+    blocked: true
+  - code: AT   # Austria
+    blocked: true
+  - code: AU   # Australia
+    blocked: true
+  - code: AW   # Aruba
+    blocked: true
+  - code: AX   # Aland Islands
+    blocked: true
+  - code: AZ   # Azerbaijan
+    blocked: true
+  - code: BA   # Bosnia and Herzegovina
+    blocked: true
+  - code: BB   # Barbados
+    blocked: true
+  - code: BD   # Bangladesh
+    blocked: true
+  - code: BE   # Belgium
+    blocked: true
+  - code: BF   # Burkina Faso
+    blocked: true
+  - code: BG   # Bulgaria
+    blocked: true
+  - code: BH   # Bahrain
+    blocked: true
+  - code: BI   # Burundi
+    blocked: true
+  - code: BJ   # Benin
+    blocked: true
+  - code: BL   # Saint Barthelemy
+    blocked: true
+  - code: BM   # Bermuda
+    blocked: true
+  - code: BN   # Brunei Darussalam
+    blocked: true
+  - code: BO   # Bolivia
+    blocked: true
+  - code: BQ   # Bonaire
+    blocked: true
+  - code: BR   # Brazil
+    blocked: true
+  - code: BS   # Bahamas
+    blocked: true
+  - code: BT   # Bhutan
+    blocked: true
+  - code: BV   # Bouvet Island — no ipdeny zone file
+    blocked: false
+  - code: BW   # Botswana
+    blocked: true
+  - code: BY   # Belarus
+    blocked: true
+  - code: BZ   # Belize
+    blocked: true
+  - code: CA   # Canada
+    blocked: true
+  - code: CC   # Cocos Islands
+    blocked: true
+  - code: CD   # Dem. Rep. Congo
+    blocked: true
+  - code: CF   # Central African Republic
+    blocked: true
+  - code: CG   # Congo
+    blocked: true
+  - code: CH   # Switzerland
+    blocked: true
+  - code: CI   # Cote d'Ivoire
+    blocked: true
+  - code: CK   # Cook Islands
+    blocked: true
+  - code: CL   # Chile
+    blocked: true
+  - code: CM   # Cameroon
+    blocked: true
+  - code: CN   # China
+    blocked: true
+  - code: CO   # Colombia
+    blocked: true
+  - code: CR   # Costa Rica
+    blocked: true
+  - code: CU   # Cuba
+    blocked: true
+  - code: CV   # Cabo Verde
+    blocked: true
+  - code: CW   # Curacao
+    blocked: true
+  - code: CX   # Christmas Island — no ipdeny zone file
+    blocked: false
+  - code: CY   # Cyprus
+    blocked: true
+  - code: CZ   # Czechia
+    blocked: true
+  - code: DE   # Germany
+    blocked: true
+  - code: DJ   # Djibouti
+    blocked: true
+  - code: DK   # Denmark
+    blocked: true
+  - code: DM   # Dominica
+    blocked: true
+  - code: DO   # Dominican Republic
+    blocked: true
+  - code: DZ   # Algeria
+    blocked: true
+  - code: EC   # Ecuador
+    blocked: true
+  - code: EE   # Estonia
+    blocked: true
+  - code: EG   # Egypt
+    blocked: true
+  - code: EH   # Western Sahara — no ipdeny zone file
+    blocked: false
+  - code: ER   # Eritrea
+    blocked: true
+  - code: ES   # Spain
+    blocked: true
+  - code: ET   # Ethiopia
+    blocked: true
+  - code: FI   # Finland
+    blocked: true
+  - code: FJ   # Fiji
+    blocked: true
+  - code: FK   # Falkland Islands
+    blocked: true
+  - code: FM   # Micronesia
+    blocked: true
+  - code: FO   # Faroe Islands
+    blocked: true
+  - code: FR   # France
+    blocked: true
+  - code: GA   # Gabon
+    blocked: true
+  - code: GB   # United Kingdom
+    blocked: true
+  - code: GD   # Grenada
+    blocked: true
+  - code: GE   # Georgia
+    blocked: true
+  - code: GF   # French Guiana
+    blocked: true
+  - code: GG   # Guernsey
+    blocked: true
+  - code: GH   # Ghana
+    blocked: true
+  - code: GI   # Gibraltar
+    blocked: true
+  - code: GL   # Greenland
+    blocked: true
+  - code: GM   # Gambia
+    blocked: true
+  - code: GN   # Guinea
+    blocked: true
+  - code: GP   # Guadeloupe
+    blocked: true
+  - code: GQ   # Equatorial Guinea
+    blocked: true
+  - code: GR   # Greece
+    blocked: true
+  - code: GS   # South Georgia — no ipdeny zone file
+    blocked: false
+  - code: GT   # Guatemala
+    blocked: true
+  - code: GU   # Guam
+    blocked: true
+  - code: GW   # Guinea-Bissau
+    blocked: true
+  - code: GY   # Guyana
+    blocked: true
+  - code: HK   # Hong Kong
+    blocked: true
+  - code: HM   # Heard Island — no ipdeny zone file
+    blocked: false
+  - code: HN   # Honduras
+    blocked: true
+  - code: HR   # Croatia
+    blocked: true
+  - code: HT   # Haiti
+    blocked: true
+  - code: HU   # Hungary
+    blocked: true
+  - code: ID   # Indonesia
+    blocked: true
+  - code: IE   # Ireland
+    blocked: true
+  - code: IL   # Israel
+    blocked: true
+  - code: IM   # Isle of Man
+    blocked: true
+  - code: IN   # India
+    blocked: true
+  - code: IO   # British Indian Ocean Territory
+    blocked: true
+  - code: IQ   # Iraq
+    blocked: true
+  - code: IR   # Iran
+    blocked: true
+  - code: IS   # Iceland
+    blocked: true
+  - code: IT   # Italy
+    blocked: true
+  - code: JE   # Jersey
+    blocked: true
+  - code: JM   # Jamaica
+    blocked: true
+  - code: JO   # Jordan
+    blocked: true
+  - code: JP   # Japan
+    blocked: true
+  - code: KE   # Kenya
+    blocked: true
+  - code: KG   # Kyrgyzstan
+    blocked: true
+  - code: KH   # Cambodia
+    blocked: true
+  - code: KI   # Kiribati
+    blocked: true
+  - code: KM   # Comoros
+    blocked: true
+  - code: KN   # Saint Kitts and Nevis
+    blocked: true
+  - code: KP   # North Korea
+    blocked: true
+  - code: KR   # South Korea
+    blocked: true
+  - code: KW   # Kuwait
+    blocked: true
+  - code: KY   # Cayman Islands
+    blocked: true
+  - code: KZ   # Kazakhstan
+    blocked: true
+  - code: LA   # Laos
+    blocked: true
+  - code: LB   # Lebanon
+    blocked: true
+  - code: LC   # Saint Lucia
+    blocked: true
+  - code: LI   # Liechtenstein
+    blocked: true
+  - code: LK   # Sri Lanka
+    blocked: true
+  - code: LR   # Liberia
+    blocked: true
+  - code: LS   # Lesotho
+    blocked: true
+  - code: LT   # Lithuania
+    blocked: true
+  - code: LU   # Luxembourg
+    blocked: true
+  - code: LV   # Latvia
+    blocked: true
+  - code: LY   # Libya
+    blocked: true
+  - code: MA   # Morocco
+    blocked: true
+  - code: MC   # Monaco
+    blocked: true
+  - code: MD   # Moldova
+    blocked: true
+  - code: ME   # Montenegro
+    blocked: true
+  - code: MF   # Saint Martin
+    blocked: true
+  - code: MG   # Madagascar
+    blocked: true
+  - code: MH   # Marshall Islands
+    blocked: true
+  - code: MK   # North Macedonia
+    blocked: true
+  - code: ML   # Mali
+    blocked: true
+  - code: MM   # Myanmar
+    blocked: true
+  - code: MN   # Mongolia
+    blocked: true
+  - code: MO   # Macao
+    blocked: true
+  - code: MP   # Northern Mariana Islands
+    blocked: true
+  - code: MQ   # Martinique
+    blocked: true
+  - code: MR   # Mauritania
+    blocked: true
+  - code: MS   # Montserrat
+    blocked: true
+  - code: MT   # Malta
+    blocked: true
+  - code: MU   # Mauritius
+    blocked: true
+  - code: MV   # Maldives
+    blocked: true
+  - code: MW   # Malawi
+    blocked: true
+  - code: MX   # Mexico
+    blocked: true
+  - code: MY   # Malaysia
+    blocked: true
+  - code: MZ   # Mozambique
+    blocked: true
+  - code: NA   # Namibia
+    blocked: true
+  - code: NC   # New Caledonia
+    blocked: true
+  - code: NE   # Niger
+    blocked: true
+  - code: NF   # Norfolk Island
+    blocked: true
+  - code: NG   # Nigeria
+    blocked: true
+  - code: NI   # Nicaragua
+    blocked: true
+  - code: NL   # Netherlands
+    blocked: true
+  - code: "NO"  # Norway
+    blocked: true
+  - code: NP   # Nepal
+    blocked: true
+  - code: NR   # Nauru
+    blocked: true
+  - code: NU   # Niue
+    blocked: true
+  - code: NZ   # New Zealand
+    blocked: true
+  - code: OM   # Oman
+    blocked: true
+  - code: PA   # Panama
+    blocked: true
+  - code: PE   # Peru
+    blocked: true
+  - code: PF   # French Polynesia
+    blocked: true
+  - code: PG   # Papua New Guinea
+    blocked: true
+  - code: PH   # Philippines
+    blocked: true
+  - code: PK   # Pakistan
+    blocked: true
+  - code: PL   # Poland
+    blocked: true
+  - code: PM   # Saint Pierre and Miquelon
+    blocked: true
+  - code: PN   # Pitcairn — no ipdeny zone file
+    blocked: false
+  - code: PR   # Puerto Rico
+    blocked: true
+  - code: PS   # Palestine
+    blocked: true
+  - code: PT   # Portugal
+    blocked: true
+  - code: PW   # Palau
+    blocked: true
+  - code: PY   # Paraguay
+    blocked: true
+  - code: QA   # Qatar
+    blocked: true
+  - code: RE   # Reunion
+    blocked: true
+  - code: RO   # Romania
+    blocked: true
+  - code: RS   # Serbia
+    blocked: true
+  - code: RU   # Russia
+    blocked: true
+  - code: RW   # Rwanda
+    blocked: true
+  - code: SA   # Saudi Arabia
+    blocked: true
+  - code: SB   # Solomon Islands
+    blocked: true
+  - code: SC   # Seychelles
+    blocked: true
+  - code: SD   # Sudan
+    blocked: true
+  - code: SE   # Sweden
+    blocked: true
+  - code: SG   # Singapore
+    blocked: true
+  - code: SH   # Saint Helena — no ipdeny zone file
+    blocked: false
+  - code: SI   # Slovenia
+    blocked: true
+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
+    blocked: false
+  - code: SK   # Slovakia
+    blocked: true
+  - code: SL   # Sierra Leone
+    blocked: true
+  - code: SM   # San Marino
+    blocked: true
+  - code: SN   # Senegal
+    blocked: true
+  - code: SO   # Somalia
+    blocked: true
+  - code: SR   # Suriname
+    blocked: true
+  - code: SS   # South Sudan
+    blocked: true
+  - code: ST   # Sao Tome and Principe
+    blocked: true
+  - code: SV   # El Salvador
+    blocked: true
+  - code: SX   # Sint Maarten
+    blocked: true
+  - code: SY   # Syria
+    blocked: true
+  - code: SZ   # Eswatini
+    blocked: true
+  - code: TC   # Turks and Caicos Islands
+    blocked: true
+  - code: TD   # Chad
+    blocked: true
+  - code: TF   # French Southern Territories — no ipdeny zone file
+    blocked: false
+  - code: TG   # Togo
+    blocked: true
+  - code: TH   # Thailand
+    blocked: true
+  - code: TJ   # Tajikistan
+    blocked: true
+  - code: TK   # Tokelau
+    blocked: true
+  - code: TL   # Timor-Leste
+    blocked: true
+  - code: TM   # Turkmenistan
+    blocked: true
+  - code: TN   # Tunisia
+    blocked: true
+  - code: TO   # Tonga
+    blocked: true
+  - code: TR   # Turkey
+    blocked: true
+  - code: TT   # Trinidad and Tobago
+    blocked: true
+  - code: TV   # Tuvalu
+    blocked: true
+  - code: TW   # Taiwan
+    blocked: true
+  - code: TZ   # Tanzania
+    blocked: true
+  - code: UA   # Ukraine
+    blocked: true
+  - code: UG   # Uganda
+    blocked: true
+  - code: UM   # US Minor Outlying Islands
+    blocked: true
+  - code: US   # United States
+    blocked: false
+  - code: UY   # Uruguay
+    blocked: true
+  - code: UZ   # Uzbekistan
+    blocked: true
+  - code: VA   # Vatican City
+    blocked: true
+  - code: VC   # Saint Vincent and the Grenadines
+    blocked: true
+  - code: VE   # Venezuela
+    blocked: true
+  - code: VG   # British Virgin Islands
+    blocked: true
+  - code: VI   # US Virgin Islands
+    blocked: true
+  - code: VN   # Vietnam
+    blocked: true
+  - code: VU   # Vanuatu
+    blocked: true
+  - code: WF   # Wallis and Futuna
+    blocked: true
+  - code: WS   # Samoa
+    blocked: true
+  - code: XK   # Kosovo — no ipdeny zone file
+    blocked: false
+  - code: YE   # Yemen
+    blocked: true
+  - code: YT   # Mayotte
+    blocked: true
+  - code: ZA   # South Africa
+    blocked: true
+  - code: ZM   # Zambia
+    blocked: true
+  - code: ZW   # Zimbabwe
+    blocked: true

+ 4 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml

@@ -0,0 +1,4 @@
+---
+- name: reload nftables
+  ansible.builtin.command: nft -f {{ geo_nft_file }}
+  changed_when: true

+ 103 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml

@@ -0,0 +1,103 @@
+---
+- name: Ensure nftables.d directory exists
+  ansible.builtin.file:
+    path: "{{ geo_nft_table_dir }}"
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Create temp directory for zone files
+  ansible.builtin.tempfile:
+    state: directory
+    suffix: geo_zones
+  register: geo_temp_dir
+
+# --- Source: live download ---
+
+- name: Test connectivity to ipdeny.com (fast pre-check)
+  ansible.builtin.uri:
+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
+    method: HEAD
+    timeout: 8
+  register: geo_connectivity_check
+  ignore_errors: yes
+  when: geo_zone_files_dir | length == 0
+
+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
+  ansible.builtin.fail:
+    msg: >-
+      Cannot reach ipdeny.com (connection timed out or refused) and
+      geo_zone_files_dir is not set. Pre-download zone files on a machine
+      with internet access using scripts/download-geo-zones.sh, copy them
+      to this host, then set geo_zone_files_dir in inventory or with -e.
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is failed
+
+- name: Download zone files for blocked countries
+  ansible.builtin.get_url:
+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    timeout: 30
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is succeeded
+
+# --- Source: local pre-downloaded cache ---
+
+- name: Copy zone files from local cache directory
+  ansible.builtin.copy:
+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    remote_src: yes
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when: geo_zone_files_dir | length > 0
+
+# --- Assemble and deploy ---
+
+- name: Assemble all CIDRs from downloaded zone files
+  ansible.builtin.shell: >
+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
+    grep -v '^#' | grep -v '^$' | sort -u
+  register: geo_cidrs_raw
+  changed_when: false
+
+- name: Set geo_blocked_cidrs fact
+  ansible.builtin.set_fact:
+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
+
+- name: Deploy geo-block nftables ruleset
+  ansible.builtin.template:
+    src: geo-block.nft.j2
+    dest: "{{ geo_nft_file }}"
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nftables
+
+- name: Ensure nftables.conf includes geo-block.nft
+  ansible.builtin.lineinfile:
+    path: /etc/sysconfig/nftables.conf
+    line: 'include "{{ geo_nft_file }}"'
+    state: present
+    backup: yes
+
+- name: Enable and start nftables service
+  ansible.builtin.service:
+    name: nftables
+    state: started
+    enabled: yes
+
+- name: Clean up temp directory
+  ansible.builtin.file:
+    path: "{{ geo_temp_dir.path }}"
+    state: absent

+ 26 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2

@@ -0,0 +1,26 @@
+#!/usr/sbin/nft -f
+# Managed by Ansible — do not edit manually
+
+# Ensure table exists, then flush for idempotency
+add table inet geo_block
+flush table inet geo_block
+
+table inet geo_block {
+    set blocked_countries {
+        type ipv4_addr
+        flags interval
+{% if geo_blocked_cidrs | length > 0 %}
+        elements = {
+{% for cidr in geo_blocked_cidrs %}
+            {{ cidr }}{% if not loop.last %},{% endif %}
+
+{% endfor %}
+        }
+{% endif %}
+    }
+
+    chain prerouting {
+        type filter hook prerouting priority -100; policy accept;
+        ip saddr @blocked_countries drop
+    }
+}

+ 15 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml

@@ -0,0 +1,15 @@
+---
+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
+nginx_hsts_max_age: 31536000
+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
+nginx_client_max_body_size: "10m"
+nginx_proxy_read_timeout: 60
+
+# Services that need a port-80 → HTTPS redirect added.
+# List only services that do NOT already have a redirect in their existing config.
+nginx_redirect_services:
+  - name: service1
+    server_name: service1.example.com
+  - name: service2
+    server_name: service2.example.com

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: reload nginx
+  ansible.builtin.service:
+    name: nginx
+    state: reloaded

+ 44 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml

@@ -0,0 +1,44 @@
+---
+- name: Deploy security headers configuration
+  ansible.builtin.template:
+    src: security_headers.conf.j2
+    dest: /etc/nginx/conf.d/00-security-headers.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy SSL parameters configuration
+  ansible.builtin.template:
+    src: ssl_params.conf.j2
+    dest: /etc/nginx/conf.d/00-ssl-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy proxy parameters configuration
+  ansible.builtin.template:
+    src: proxy_params.conf.j2
+    dest: /etc/nginx/conf.d/00-proxy-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy HTTP to HTTPS redirect configuration
+  ansible.builtin.template:
+    src: http_redirect.conf.j2
+    dest: /etc/nginx/conf.d/00-http-redirects.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Validate NGINX configuration
+  ansible.builtin.command: nginx -t
+  changed_when: false

+ 8 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+{% for svc in nginx_redirect_services %}
+server {
+    listen 80;
+    server_name {{ svc.server_name }};
+    return 301 https://$host$request_uri;
+}
+{% endfor %}

+ 8 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+
+proxy_hide_header X-Powered-By;
+proxy_hide_header Server;
+proxy_set_header X-Real-IP $remote_addr;
+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+proxy_set_header X-Forwarded-Proto $scheme;
+proxy_read_timeout {{ nginx_proxy_read_timeout }};

+ 17 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2

@@ -0,0 +1,17 @@
+# Managed by Ansible — do not edit manually
+
+server_tokens off;
+
+# Rate limiting zone definition
+limit_req_zone {{ nginx_rate_limit_req_zone }};
+
+# Client body size limit
+client_max_body_size {{ nginx_client_max_body_size }};
+
+# Security headers
+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
+add_header X-Frame-Options SAMEORIGIN always;
+add_header X-Content-Type-Options nosniff always;
+add_header Referrer-Policy strict-origin-when-cross-origin always;
+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
+add_header X-XSS-Protection "1; mode=block" always;

+ 10 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2

@@ -0,0 +1,10 @@
+# Managed by Ansible — do not edit manually
+
+ssl_protocols {{ nginx_ssl_protocols }};
+ssl_ciphers {{ nginx_ssl_ciphers }};
+ssl_prefer_server_ciphers off;
+ssl_session_timeout 1d;
+ssl_stapling on;
+ssl_stapling_verify on;
+resolver 8.8.8.8 8.8.4.4 valid=300s;
+resolver_timeout 5s;

+ 71 - 0
tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Download ipdeny.com aggregated zone files for all blocked countries.
+# Run this on a machine WITH internet access, then rsync the output
+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
+#
+# Usage:
+#   ./scripts/download-geo-zones.sh [output-dir]
+#
+# Example workflow:
+#   # On your workstation:
+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+#
+#   # Then run the playbook pointing at the cache:
+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+
+set -euo pipefail
+
+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
+OUT_DIR="${1:-/tmp/geo_zones}"
+
+# All blocked country codes (excludes US and ipdeny-absent territories)
+COUNTRIES=(
+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
+  DE DJ DK DM DO DZ
+  EC EE EG ER ES ET
+  FI FJ FK FM FO FR
+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
+  HK HN HR HT HU
+  ID IE IL IM IN IO IQ IR IS IT
+  JE JM JO JP
+  KE KG KH KI KM KN KP KR KW KY KZ
+  LA LB LC LI LK LR LS LT LU LV LY
+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
+  NA NC NE NF NG NI NL NO NP NR NU NZ
+  OM
+  PA PE PF PG PH PK PL PM PR PS PT PW PY
+  QA
+  RE RO RS RU RW
+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
+  UA UG UM UY UZ
+  VA VC VE VG VI VN VU
+  WF WS
+  YE YT
+  ZA ZM ZW
+)
+
+mkdir -p "$OUT_DIR"
+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
+
+ok=0; fail=0
+for cc in "${COUNTRIES[@]}"; do
+  url="${BASE_URL}/${cc,,}-aggregated.zone"
+  dest="${OUT_DIR}/${cc,,}.zone"
+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
+    (( ++ok ))
+  else
+    echo "  SKIP $cc (no zone file at ipdeny.com)"
+    rm -f "$dest"
+    (( ++fail ))
+  fi
+done
+
+echo "Done: $ok downloaded, $fail skipped."
+echo ""
+echo "Next steps:"
+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/site.yml

@@ -0,0 +1,7 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - nginx_hardening
+    - fail2ban
+    - geo_blocking

+ 6 - 0
tftsr_nginx-hardening/playbooks/fail2ban.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - fail2ban

+ 6 - 0
tftsr_nginx-hardening/playbooks/geo_blocking.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - geo_blocking

+ 6 - 0
tftsr_nginx-hardening/playbooks/nginx_hardening.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - nginx_hardening

+ 6 - 0
tftsr_nginx-hardening/playbooks/update_geo_blocks.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - geo_blocking

+ 7 - 0
tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml

@@ -0,0 +1,7 @@
+---
+fail2ban_bantime: 3600
+fail2ban_findtime: 600
+fail2ban_maxretry_ssh: 5
+fail2ban_maxretry_nginx_4xx: 20
+fail2ban_maxretry_nginx_auth: 5
+fail2ban_ignoreip: "127.0.0.1/8 ::1"

+ 5 - 0
tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: restart fail2ban
+  ansible.builtin.service:
+    name: fail2ban
+    state: restarted

+ 41 - 0
tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml

@@ -0,0 +1,41 @@
+---
+- name: Install fail2ban
+  ansible.builtin.dnf:
+    name: fail2ban
+    state: present
+
+- name: Deploy nginx-4xx filter
+  ansible.builtin.template:
+    src: nginx-4xx.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy nginx-auth filter
+  ansible.builtin.template:
+    src: nginx-auth.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy jail.local configuration
+  ansible.builtin.template:
+    src: jail.local.j2
+    dest: /etc/fail2ban/jail.local
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Enable and start fail2ban service
+  ansible.builtin.service:
+    name: fail2ban
+    state: started
+    enabled: yes

+ 22 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2

@@ -0,0 +1,22 @@
+[DEFAULT]
+ignoreip = {{ fail2ban_ignoreip }}
+bantime  = {{ fail2ban_bantime }}
+findtime = {{ fail2ban_findtime }}
+
+[sshd]
+enabled  = true
+port     = ssh
+logpath  = /var/log/secure
+maxretry = {{ fail2ban_maxretry_ssh }}
+
+[nginx-4xx]
+enabled  = true
+filter   = nginx-4xx
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
+
+[nginx-auth]
+enabled  = true
+filter   = nginx-auth
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_auth }}

+ 3 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
+ignoreregex =

+ 3 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
+ignoreregex =

+ 509 - 0
tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml

@@ -0,0 +1,509 @@
+---
+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
+geo_nft_table_dir: "/etc/nftables.d"
+geo_nft_file: "/etc/nftables.d/geo-block.nft"
+# Set this to a directory containing pre-downloaded {cc}.zone files when the
+# target host has no outbound internet access. Leave empty to download live.
+geo_zone_files_dir: ""
+
+geo_countries:
+  - code: AD   # Andorra
+    blocked: true
+  - code: AE   # United Arab Emirates
+    blocked: true
+  - code: AF   # Afghanistan
+    blocked: true
+  - code: AG   # Antigua and Barbuda
+    blocked: true
+  - code: AI   # Anguilla
+    blocked: true
+  - code: AL   # Albania
+    blocked: true
+  - code: AM   # Armenia
+    blocked: true
+  - code: AO   # Angola
+    blocked: true
+  - code: AQ   # Antarctica
+    blocked: true
+  - code: AR   # Argentina
+    blocked: true
+  - code: AS   # American Samoa
+    blocked: true
+  - code: AT   # Austria
+    blocked: true
+  - code: AU   # Australia
+    blocked: true
+  - code: AW   # Aruba
+    blocked: true
+  - code: AX   # Aland Islands
+    blocked: true
+  - code: AZ   # Azerbaijan
+    blocked: true
+  - code: BA   # Bosnia and Herzegovina
+    blocked: true
+  - code: BB   # Barbados
+    blocked: true
+  - code: BD   # Bangladesh
+    blocked: true
+  - code: BE   # Belgium
+    blocked: true
+  - code: BF   # Burkina Faso
+    blocked: true
+  - code: BG   # Bulgaria
+    blocked: true
+  - code: BH   # Bahrain
+    blocked: true
+  - code: BI   # Burundi
+    blocked: true
+  - code: BJ   # Benin
+    blocked: true
+  - code: BL   # Saint Barthelemy
+    blocked: true
+  - code: BM   # Bermuda
+    blocked: true
+  - code: BN   # Brunei Darussalam
+    blocked: true
+  - code: BO   # Bolivia
+    blocked: true
+  - code: BQ   # Bonaire
+    blocked: true
+  - code: BR   # Brazil
+    blocked: true
+  - code: BS   # Bahamas
+    blocked: true
+  - code: BT   # Bhutan
+    blocked: true
+  - code: BV   # Bouvet Island — no ipdeny zone file
+    blocked: false
+  - code: BW   # Botswana
+    blocked: true
+  - code: BY   # Belarus
+    blocked: true
+  - code: BZ   # Belize
+    blocked: true
+  - code: CA   # Canada
+    blocked: true
+  - code: CC   # Cocos Islands
+    blocked: true
+  - code: CD   # Dem. Rep. Congo
+    blocked: true
+  - code: CF   # Central African Republic
+    blocked: true
+  - code: CG   # Congo
+    blocked: true
+  - code: CH   # Switzerland
+    blocked: true
+  - code: CI   # Cote d'Ivoire
+    blocked: true
+  - code: CK   # Cook Islands
+    blocked: true
+  - code: CL   # Chile
+    blocked: true
+  - code: CM   # Cameroon
+    blocked: true
+  - code: CN   # China
+    blocked: true
+  - code: CO   # Colombia
+    blocked: true
+  - code: CR   # Costa Rica
+    blocked: true
+  - code: CU   # Cuba
+    blocked: true
+  - code: CV   # Cabo Verde
+    blocked: true
+  - code: CW   # Curacao
+    blocked: true
+  - code: CX   # Christmas Island — no ipdeny zone file
+    blocked: false
+  - code: CY   # Cyprus
+    blocked: true
+  - code: CZ   # Czechia
+    blocked: true
+  - code: DE   # Germany
+    blocked: true
+  - code: DJ   # Djibouti
+    blocked: true
+  - code: DK   # Denmark
+    blocked: true
+  - code: DM   # Dominica
+    blocked: true
+  - code: DO   # Dominican Republic
+    blocked: true
+  - code: DZ   # Algeria
+    blocked: true
+  - code: EC   # Ecuador
+    blocked: true
+  - code: EE   # Estonia
+    blocked: true
+  - code: EG   # Egypt
+    blocked: true
+  - code: EH   # Western Sahara — no ipdeny zone file
+    blocked: false
+  - code: ER   # Eritrea
+    blocked: true
+  - code: ES   # Spain
+    blocked: true
+  - code: ET   # Ethiopia
+    blocked: true
+  - code: FI   # Finland
+    blocked: true
+  - code: FJ   # Fiji
+    blocked: true
+  - code: FK   # Falkland Islands
+    blocked: true
+  - code: FM   # Micronesia
+    blocked: true
+  - code: FO   # Faroe Islands
+    blocked: true
+  - code: FR   # France
+    blocked: true
+  - code: GA   # Gabon
+    blocked: true
+  - code: GB   # United Kingdom
+    blocked: true
+  - code: GD   # Grenada
+    blocked: true
+  - code: GE   # Georgia
+    blocked: true
+  - code: GF   # French Guiana
+    blocked: true
+  - code: GG   # Guernsey
+    blocked: true
+  - code: GH   # Ghana
+    blocked: true
+  - code: GI   # Gibraltar
+    blocked: true
+  - code: GL   # Greenland
+    blocked: true
+  - code: GM   # Gambia
+    blocked: true
+  - code: GN   # Guinea
+    blocked: true
+  - code: GP   # Guadeloupe
+    blocked: true
+  - code: GQ   # Equatorial Guinea
+    blocked: true
+  - code: GR   # Greece
+    blocked: true
+  - code: GS   # South Georgia — no ipdeny zone file
+    blocked: false
+  - code: GT   # Guatemala
+    blocked: true
+  - code: GU   # Guam
+    blocked: true
+  - code: GW   # Guinea-Bissau
+    blocked: true
+  - code: GY   # Guyana
+    blocked: true
+  - code: HK   # Hong Kong
+    blocked: true
+  - code: HM   # Heard Island — no ipdeny zone file
+    blocked: false
+  - code: HN   # Honduras
+    blocked: true
+  - code: HR   # Croatia
+    blocked: true
+  - code: HT   # Haiti
+    blocked: true
+  - code: HU   # Hungary
+    blocked: true
+  - code: ID   # Indonesia
+    blocked: true
+  - code: IE   # Ireland
+    blocked: true
+  - code: IL   # Israel
+    blocked: true
+  - code: IM   # Isle of Man
+    blocked: true
+  - code: IN   # India
+    blocked: true
+  - code: IO   # British Indian Ocean Territory
+    blocked: true
+  - code: IQ   # Iraq
+    blocked: true
+  - code: IR   # Iran
+    blocked: true
+  - code: IS   # Iceland
+    blocked: true
+  - code: IT   # Italy
+    blocked: true
+  - code: JE   # Jersey
+    blocked: true
+  - code: JM   # Jamaica
+    blocked: true
+  - code: JO   # Jordan
+    blocked: true
+  - code: JP   # Japan
+    blocked: true
+  - code: KE   # Kenya
+    blocked: true
+  - code: KG   # Kyrgyzstan
+    blocked: true
+  - code: KH   # Cambodia
+    blocked: true
+  - code: KI   # Kiribati
+    blocked: true
+  - code: KM   # Comoros
+    blocked: true
+  - code: KN   # Saint Kitts and Nevis
+    blocked: true
+  - code: KP   # North Korea
+    blocked: true
+  - code: KR   # South Korea
+    blocked: true
+  - code: KW   # Kuwait
+    blocked: true
+  - code: KY   # Cayman Islands
+    blocked: true
+  - code: KZ   # Kazakhstan
+    blocked: true
+  - code: LA   # Laos
+    blocked: true
+  - code: LB   # Lebanon
+    blocked: true
+  - code: LC   # Saint Lucia
+    blocked: true
+  - code: LI   # Liechtenstein
+    blocked: true
+  - code: LK   # Sri Lanka
+    blocked: true
+  - code: LR   # Liberia
+    blocked: true
+  - code: LS   # Lesotho
+    blocked: true
+  - code: LT   # Lithuania
+    blocked: true
+  - code: LU   # Luxembourg
+    blocked: true
+  - code: LV   # Latvia
+    blocked: true
+  - code: LY   # Libya
+    blocked: true
+  - code: MA   # Morocco
+    blocked: true
+  - code: MC   # Monaco
+    blocked: true
+  - code: MD   # Moldova
+    blocked: true
+  - code: ME   # Montenegro
+    blocked: true
+  - code: MF   # Saint Martin
+    blocked: true
+  - code: MG   # Madagascar
+    blocked: true
+  - code: MH   # Marshall Islands
+    blocked: true
+  - code: MK   # North Macedonia
+    blocked: true
+  - code: ML   # Mali
+    blocked: true
+  - code: MM   # Myanmar
+    blocked: true
+  - code: MN   # Mongolia
+    blocked: true
+  - code: MO   # Macao
+    blocked: true
+  - code: MP   # Northern Mariana Islands
+    blocked: true
+  - code: MQ   # Martinique
+    blocked: true
+  - code: MR   # Mauritania
+    blocked: true
+  - code: MS   # Montserrat
+    blocked: true
+  - code: MT   # Malta
+    blocked: true
+  - code: MU   # Mauritius
+    blocked: true
+  - code: MV   # Maldives
+    blocked: true
+  - code: MW   # Malawi
+    blocked: true
+  - code: MX   # Mexico
+    blocked: true
+  - code: MY   # Malaysia
+    blocked: true
+  - code: MZ   # Mozambique
+    blocked: true
+  - code: NA   # Namibia
+    blocked: true
+  - code: NC   # New Caledonia
+    blocked: true
+  - code: NE   # Niger
+    blocked: true
+  - code: NF   # Norfolk Island
+    blocked: true
+  - code: NG   # Nigeria
+    blocked: true
+  - code: NI   # Nicaragua
+    blocked: true
+  - code: NL   # Netherlands
+    blocked: true
+  - code: "NO"  # Norway
+    blocked: true
+  - code: NP   # Nepal
+    blocked: true
+  - code: NR   # Nauru
+    blocked: true
+  - code: NU   # Niue
+    blocked: true
+  - code: NZ   # New Zealand
+    blocked: true
+  - code: OM   # Oman
+    blocked: true
+  - code: PA   # Panama
+    blocked: true
+  - code: PE   # Peru
+    blocked: true
+  - code: PF   # French Polynesia
+    blocked: true
+  - code: PG   # Papua New Guinea
+    blocked: true
+  - code: PH   # Philippines
+    blocked: true
+  - code: PK   # Pakistan
+    blocked: true
+  - code: PL   # Poland
+    blocked: true
+  - code: PM   # Saint Pierre and Miquelon
+    blocked: true
+  - code: PN   # Pitcairn — no ipdeny zone file
+    blocked: false
+  - code: PR   # Puerto Rico
+    blocked: true
+  - code: PS   # Palestine
+    blocked: true
+  - code: PT   # Portugal
+    blocked: true
+  - code: PW   # Palau
+    blocked: true
+  - code: PY   # Paraguay
+    blocked: true
+  - code: QA   # Qatar
+    blocked: true
+  - code: RE   # Reunion
+    blocked: true
+  - code: RO   # Romania
+    blocked: true
+  - code: RS   # Serbia
+    blocked: true
+  - code: RU   # Russia
+    blocked: true
+  - code: RW   # Rwanda
+    blocked: true
+  - code: SA   # Saudi Arabia
+    blocked: true
+  - code: SB   # Solomon Islands
+    blocked: true
+  - code: SC   # Seychelles
+    blocked: true
+  - code: SD   # Sudan
+    blocked: true
+  - code: SE   # Sweden
+    blocked: true
+  - code: SG   # Singapore
+    blocked: true
+  - code: SH   # Saint Helena — no ipdeny zone file
+    blocked: false
+  - code: SI   # Slovenia
+    blocked: true
+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
+    blocked: false
+  - code: SK   # Slovakia
+    blocked: true
+  - code: SL   # Sierra Leone
+    blocked: true
+  - code: SM   # San Marino
+    blocked: true
+  - code: SN   # Senegal
+    blocked: true
+  - code: SO   # Somalia
+    blocked: true
+  - code: SR   # Suriname
+    blocked: true
+  - code: SS   # South Sudan
+    blocked: true
+  - code: ST   # Sao Tome and Principe
+    blocked: true
+  - code: SV   # El Salvador
+    blocked: true
+  - code: SX   # Sint Maarten
+    blocked: true
+  - code: SY   # Syria
+    blocked: true
+  - code: SZ   # Eswatini
+    blocked: true
+  - code: TC   # Turks and Caicos Islands
+    blocked: true
+  - code: TD   # Chad
+    blocked: true
+  - code: TF   # French Southern Territories — no ipdeny zone file
+    blocked: false
+  - code: TG   # Togo
+    blocked: true
+  - code: TH   # Thailand
+    blocked: true
+  - code: TJ   # Tajikistan
+    blocked: true
+  - code: TK   # Tokelau
+    blocked: true
+  - code: TL   # Timor-Leste
+    blocked: true
+  - code: TM   # Turkmenistan
+    blocked: true
+  - code: TN   # Tunisia
+    blocked: true
+  - code: TO   # Tonga
+    blocked: true
+  - code: TR   # Turkey
+    blocked: true
+  - code: TT   # Trinidad and Tobago
+    blocked: true
+  - code: TV   # Tuvalu
+    blocked: true
+  - code: TW   # Taiwan
+    blocked: true
+  - code: TZ   # Tanzania
+    blocked: true
+  - code: UA   # Ukraine
+    blocked: true
+  - code: UG   # Uganda
+    blocked: true
+  - code: UM   # US Minor Outlying Islands
+    blocked: true
+  - code: US   # United States
+    blocked: false
+  - code: UY   # Uruguay
+    blocked: true
+  - code: UZ   # Uzbekistan
+    blocked: true
+  - code: VA   # Vatican City
+    blocked: true
+  - code: VC   # Saint Vincent and the Grenadines
+    blocked: true
+  - code: VE   # Venezuela
+    blocked: true
+  - code: VG   # British Virgin Islands
+    blocked: true
+  - code: VI   # US Virgin Islands
+    blocked: true
+  - code: VN   # Vietnam
+    blocked: true
+  - code: VU   # Vanuatu
+    blocked: true
+  - code: WF   # Wallis and Futuna
+    blocked: true
+  - code: WS   # Samoa
+    blocked: true
+  - code: XK   # Kosovo — no ipdeny zone file
+    blocked: false
+  - code: YE   # Yemen
+    blocked: true
+  - code: YT   # Mayotte
+    blocked: true
+  - code: ZA   # South Africa
+    blocked: true
+  - code: ZM   # Zambia
+    blocked: true
+  - code: ZW   # Zimbabwe
+    blocked: true

+ 4 - 0
tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml

@@ -0,0 +1,4 @@
+---
+- name: reload nftables
+  ansible.builtin.command: nft -f {{ geo_nft_file }}
+  changed_when: true

+ 103 - 0
tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml

@@ -0,0 +1,103 @@
+---
+- name: Ensure nftables.d directory exists
+  ansible.builtin.file:
+    path: "{{ geo_nft_table_dir }}"
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Create temp directory for zone files
+  ansible.builtin.tempfile:
+    state: directory
+    suffix: geo_zones
+  register: geo_temp_dir
+
+# --- Source: live download ---
+
+- name: Test connectivity to ipdeny.com (fast pre-check)
+  ansible.builtin.uri:
+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
+    method: HEAD
+    timeout: 8
+  register: geo_connectivity_check
+  ignore_errors: yes
+  when: geo_zone_files_dir | length == 0
+
+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
+  ansible.builtin.fail:
+    msg: >-
+      Cannot reach ipdeny.com (connection timed out or refused) and
+      geo_zone_files_dir is not set. Pre-download zone files on a machine
+      with internet access using scripts/download-geo-zones.sh, copy them
+      to this host, then set geo_zone_files_dir in inventory or with -e.
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is failed
+
+- name: Download zone files for blocked countries
+  ansible.builtin.get_url:
+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    timeout: 30
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is succeeded
+
+# --- Source: local pre-downloaded cache ---
+
+- name: Copy zone files from local cache directory
+  ansible.builtin.copy:
+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    remote_src: yes
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when: geo_zone_files_dir | length > 0
+
+# --- Assemble and deploy ---
+
+- name: Assemble all CIDRs from downloaded zone files
+  ansible.builtin.shell: >
+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
+    grep -v '^#' | grep -v '^$' | sort -u
+  register: geo_cidrs_raw
+  changed_when: false
+
+- name: Set geo_blocked_cidrs fact
+  ansible.builtin.set_fact:
+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
+
+- name: Deploy geo-block nftables ruleset
+  ansible.builtin.template:
+    src: geo-block.nft.j2
+    dest: "{{ geo_nft_file }}"
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nftables
+
+- name: Ensure nftables.conf includes geo-block.nft
+  ansible.builtin.lineinfile:
+    path: /etc/sysconfig/nftables.conf
+    line: 'include "{{ geo_nft_file }}"'
+    state: present
+    backup: yes
+
+- name: Enable and start nftables service
+  ansible.builtin.service:
+    name: nftables
+    state: started
+    enabled: yes
+
+- name: Clean up temp directory
+  ansible.builtin.file:
+    path: "{{ geo_temp_dir.path }}"
+    state: absent

+ 26 - 0
tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2

@@ -0,0 +1,26 @@
+#!/usr/sbin/nft -f
+# Managed by Ansible — do not edit manually
+
+# Ensure table exists, then flush for idempotency
+add table inet geo_block
+flush table inet geo_block
+
+table inet geo_block {
+    set blocked_countries {
+        type ipv4_addr
+        flags interval
+{% if geo_blocked_cidrs | length > 0 %}
+        elements = {
+{% for cidr in geo_blocked_cidrs %}
+            {{ cidr }}{% if not loop.last %},{% endif %}
+
+{% endfor %}
+        }
+{% endif %}
+    }
+
+    chain prerouting {
+        type filter hook prerouting priority -100; policy accept;
+        ip saddr @blocked_countries drop
+    }
+}

+ 31 - 0
tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml

@@ -0,0 +1,31 @@
+---
+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
+nginx_hsts_max_age: 31536000
+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
+nginx_client_max_body_size: "10m"
+nginx_proxy_read_timeout: 60
+
+nginx_redirect_services:
+  - name: gogs
+    server_name: gogs.tftsr.com
+  - name: homeassist
+    server_name: homeassist.tftsr.com
+  - name: kimai
+    server_name: kimai.tftsr.com
+  - name: ollama-ui
+    server_name: ollama-ui.tftsr.com
+  - name: overseerr
+    server_name: overseerr.tftsr.com
+  - name: plex
+    server_name: plex.tftsr.com
+  - name: portainer
+    server_name: portainer.tftsr.com
+  - name: radarr
+    server_name: radarr.tftsr.com
+  - name: retro
+    server_name: retro.tftsr.com
+  - name: sonarr
+    server_name: sonarr.tftsr.com
+  - name: trilium
+    server_name: trilium.tftsr.com

+ 5 - 0
tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: reload nginx
+  ansible.builtin.service:
+    name: nginx
+    state: reloaded

+ 44 - 0
tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml

@@ -0,0 +1,44 @@
+---
+- name: Deploy security headers configuration
+  ansible.builtin.template:
+    src: security_headers.conf.j2
+    dest: /etc/nginx/conf.d/00-security-headers.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy SSL parameters configuration
+  ansible.builtin.template:
+    src: ssl_params.conf.j2
+    dest: /etc/nginx/conf.d/00-ssl-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy proxy parameters configuration
+  ansible.builtin.template:
+    src: proxy_params.conf.j2
+    dest: /etc/nginx/conf.d/00-proxy-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy HTTP to HTTPS redirect configuration
+  ansible.builtin.template:
+    src: http_redirect.conf.j2
+    dest: /etc/nginx/conf.d/00-http-redirects.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Validate NGINX configuration
+  ansible.builtin.command: nginx -t
+  changed_when: false

+ 8 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+{% for svc in nginx_redirect_services %}
+server {
+    listen 80;
+    server_name {{ svc.server_name }};
+    return 301 https://$host$request_uri;
+}
+{% endfor %}

+ 8 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+
+proxy_hide_header X-Powered-By;
+proxy_hide_header Server;
+proxy_set_header X-Real-IP $remote_addr;
+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+proxy_set_header X-Forwarded-Proto $scheme;
+proxy_read_timeout {{ nginx_proxy_read_timeout }};

+ 17 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2

@@ -0,0 +1,17 @@
+# Managed by Ansible — do not edit manually
+
+server_tokens off;
+
+# Rate limiting zone definition
+limit_req_zone {{ nginx_rate_limit_req_zone }};
+
+# Client body size limit
+client_max_body_size {{ nginx_client_max_body_size }};
+
+# Security headers
+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
+add_header X-Frame-Options SAMEORIGIN always;
+add_header X-Content-Type-Options nosniff always;
+add_header Referrer-Policy strict-origin-when-cross-origin always;
+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
+add_header X-XSS-Protection "1; mode=block" always;

+ 10 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2

@@ -0,0 +1,10 @@
+# Managed by Ansible — do not edit manually
+
+ssl_protocols {{ nginx_ssl_protocols }};
+ssl_ciphers {{ nginx_ssl_ciphers }};
+ssl_prefer_server_ciphers off;
+ssl_session_timeout 1d;
+ssl_stapling on;
+ssl_stapling_verify on;
+resolver 8.8.8.8 8.8.4.4 valid=300s;
+resolver_timeout 5s;

+ 71 - 0
tftsr_nginx-hardening/scripts/download-geo-zones.sh

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Download ipdeny.com aggregated zone files for all blocked countries.
+# Run this on a machine WITH internet access, then rsync the output
+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
+#
+# Usage:
+#   ./scripts/download-geo-zones.sh [output-dir]
+#
+# Example workflow:
+#   # On your workstation:
+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+#
+#   # Then run the playbook pointing at the cache:
+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+
+set -euo pipefail
+
+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
+OUT_DIR="${1:-/tmp/geo_zones}"
+
+# All blocked country codes (excludes US and ipdeny-absent territories)
+COUNTRIES=(
+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
+  DE DJ DK DM DO DZ
+  EC EE EG ER ES ET
+  FI FJ FK FM FO FR
+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
+  HK HN HR HT HU
+  ID IE IL IM IN IO IQ IR IS IT
+  JE JM JO JP
+  KE KG KH KI KM KN KP KR KW KY KZ
+  LA LB LC LI LK LR LS LT LU LV LY
+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
+  NA NC NE NF NG NI NL NO NP NR NU NZ
+  OM
+  PA PE PF PG PH PK PL PM PR PS PT PW PY
+  QA
+  RE RO RS RU RW
+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
+  UA UG UM UY UZ
+  VA VC VE VG VI VN VU
+  WF WS
+  YE YT
+  ZA ZM ZW
+)
+
+mkdir -p "$OUT_DIR"
+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
+
+ok=0; fail=0
+for cc in "${COUNTRIES[@]}"; do
+  url="${BASE_URL}/${cc,,}-aggregated.zone"
+  dest="${OUT_DIR}/${cc,,}.zone"
+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
+    (( ++ok ))
+  else
+    echo "  SKIP $cc (no zone file at ipdeny.com)"
+    rm -f "$dest"
+    (( ++fail ))
+  fi
+done
+
+echo "Done: $ok downloaded, $fail skipped."
+echo ""
+echo "Next steps:"
+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"

+ 8 - 0
tftsr_nginx-hardening/site.yml

@@ -0,0 +1,8 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - nginx_hardening
+    - fail2ban
+    - geo_blocking