Browse Source

Merge branch 'feature/three-pass-benchmark' of sarman/tftsr_ai into master

Shaun Arman 1 ngày trước cách đây
mục cha
commit
342cbd123d
90 tập tin đã thay đổi với 4903 bổ sung410 xóa
  1. 22 15
      CLAUDE.md
  2. 45 15
      README.md
  3. 117 66
      benchmarks/README.md
  4. 17 6
      benchmarks/results/benchmark_20260307T170059.md
  5. 92 0
      benchmarks/results/benchmark_20260307T184212.md
  6. 147 0
      benchmarks/results/benchmark_20260308T003605.md
  7. 70 0
      benchmarks/results/benchmark_20260308T145246.md
  8. 57 0
      benchmarks/results/benchmark_20260308T215747.md
  9. 54 0
      benchmarks/results/benchmark_20260309T080551.md
  10. 47 0
      benchmarks/results/benchmark_20260309T174604.md
  11. 67 0
      benchmarks/results/benchmark_20260310T094843.md
  12. 117 0
      benchmarks/results/benchmark_20260310T102149.md
  13. 94 0
      benchmarks/results/benchmark_20260310T110632.md
  14. 107 0
      benchmarks/results/benchmark_20260310T122818.md
  15. 107 0
      benchmarks/results/benchmark_20260310T160815.md
  16. 78 0
      benchmarks/results/benchmark_20260310T170013.md
  17. 433 0
      benchmarks/results/benchmark_review_20260310.md
  18. 151 71
      benchmarks/results/model_selection.json
  19. 26 10
      inventory/group_vars/all.yml
  20. 49 0
      playbooks/01_vault.yml
  21. 44 2
      playbooks/02_infrastructure.yml
  22. 165 37
      playbooks/03_benchmark.yml
  23. 74 7
      playbooks/04_models.yml
  24. 1 1
      playbooks/07_openwebui.yml
  25. 21 0
      playbooks/08_openclaw.yml
  26. 171 0
      playbooks/_bench_tier_batch.yml
  27. 75 45
      roles/models/README.md
  28. 52 41
      roles/ollama/README.md
  29. 31 25
      roles/openclaw/README.md
  30. 36 39
      roles/openwebui/README.md
  31. 26 0
      templates/ollama/ollama-node0.service.j2
  32. 16 15
      templates/ollama/override.conf.j2
  33. 28 0
      templates/ollama/warmup-node0.sh.j2
  34. 4 4
      templates/ollama/warmup.sh.j2
  35. 14 0
      templates/systemd/ollama-warmup-node0.service.j2
  36. 15 0
      templates/vault/vault-unseal.service.j2
  37. 11 11
      templates/vault/vault-unseal.sh.j2
  38. 74 0
      tftsr_nginx-hardening/CLAUDE.md
  39. 4 0
      tftsr_nginx-hardening/ansible.cfg
  40. 4 0
      tftsr_nginx-hardening/inventory/hosts.yml
  41. 73 0
      tftsr_nginx-hardening/nginx-hardening/CLAUDE.md
  42. 179 0
      tftsr_nginx-hardening/nginx-hardening/README.md
  43. 4 0
      tftsr_nginx-hardening/nginx-hardening/ansible.cfg
  44. 7 0
      tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml
  45. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml
  46. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml
  47. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml
  48. 5 0
      tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml
  49. 7 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml
  50. 5 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml
  51. 41 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml
  52. 22 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2
  53. 3 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
  54. 3 0
      tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
  55. 509 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml
  56. 4 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml
  57. 103 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml
  58. 26 0
      tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
  59. 15 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml
  60. 5 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml
  61. 44 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml
  62. 8 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
  63. 8 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
  64. 17 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
  65. 10 0
      tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
  66. 71 0
      tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh
  67. 7 0
      tftsr_nginx-hardening/nginx-hardening/site.yml
  68. 6 0
      tftsr_nginx-hardening/playbooks/fail2ban.yml
  69. 6 0
      tftsr_nginx-hardening/playbooks/geo_blocking.yml
  70. 6 0
      tftsr_nginx-hardening/playbooks/nginx_hardening.yml
  71. 6 0
      tftsr_nginx-hardening/playbooks/update_geo_blocks.yml
  72. 7 0
      tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml
  73. 5 0
      tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml
  74. 41 0
      tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml
  75. 22 0
      tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2
  76. 3 0
      tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
  77. 3 0
      tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
  78. 509 0
      tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml
  79. 4 0
      tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml
  80. 103 0
      tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml
  81. 26 0
      tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
  82. 31 0
      tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml
  83. 5 0
      tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml
  84. 44 0
      tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml
  85. 8 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
  86. 8 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
  87. 17 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
  88. 10 0
      tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
  89. 71 0
      tftsr_nginx-hardening/scripts/download-geo-zones.sh
  90. 8 0
      tftsr_nginx-hardening/site.yml

+ 22 - 15
CLAUDE.md

@@ -6,22 +6,26 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ```bash
 # Full deployment
-ansible-playbook playbooks/site.yml
+ansible-playbook playbooks/site.yml -K -e @local.yml
 
 # Run a single playbook
-ansible-playbook playbooks/03_benchmark.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
 
 # Run with tags (each playbook defines granular tags)
-ansible-playbook playbooks/site.yml --tags ollama,docker
+ansible-playbook playbooks/site.yml --tags ollama,docker -K -e @local.yml
 
 # Benchmark and update warm-up slots in one shot
-ansible-playbook playbooks/03_benchmark.yml && ansible-playbook playbooks/04_models.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
 
-# Override slot 4 with a specific model
-ansible-playbook playbooks/04_models.yml -e "slot4_model=qwen2.5-coder:7b"
+# Rotate general slot (Node 1, port 11434)
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
+
+# Rotate coding slot (Node 0, port 11435)
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
 
 # Run against a subset of hosts
-ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy
+ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy -K -e @local.yml
 
 # Lint playbooks
 ansible-lint playbooks/
@@ -30,7 +34,7 @@ ansible-lint playbooks/
 ansible-galaxy collection install -r requirements.yml
 
 # Check mode (dry run)
-ansible-playbook playbooks/site.yml --check --diff
+ansible-playbook playbooks/site.yml --check --diff -K -e @local.yml
 ```
 
 ## Required Local Configuration
@@ -87,17 +91,20 @@ All credentials live exclusively in Vault under `secret/data/{{ vault_project_sl
 
 **Composite score formula:**
 ```
-composite = (quality × 0.45) + (tokens_per_sec / 30, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
+composite = (quality × 0.45) + (tokens_per_sec / ceiling, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
 ```
+`benchmark_toks_norm_ceiling` defaults to 40 (dual-socket target).
+
+**Slot classification:** if `coding_composite - general_composite >= 0.10` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
 
-**Slot classification:** if `coding_composite - general_composite >= 0.15` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
+**6 warm-up slots across two NUMA instances:**
+- Node 1 (port 11434): slots 1–2 locked general + slot 5 rotatable general
+- Node 0 (port 11435): slots 3–4 locked coding + slot 6 rotatable coding
+- Slots 5/6 rotatable via `-e slot5_model=<name>` / `-e slot6_model=<name>` without re-benchmarking
 
-**4 warm-up slots always hot in RAM:**
-- Slots 1–2: top general-purpose models by composite score
-- Slots 3–4: top coding models by composite score
-- Slot 4 is user-rotatable via `-e slot4_model=<name>` without re-benchmarking
+`04_models.yml` creates Modelfiles (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) and two warmup services: `ollama-warmup.service` (Node 1) and `ollama-warmup-node0.service` (Node 0).
 
-`04_models.yml` creates named Ollama Modelfiles (`coder-128k`, `coder-32k`, `llama-family`, `gemma-family`) and a `ollama-warmup.service` systemd one-shot that pre-loads all 4 slots after Ollama starts.
+**Benchmark alias filter:** `benchmark_skip_aliases` in `group_vars/all.yml` lists the Modelfile aliases — the benchmark playbook excludes these from the test loop to prevent 32k-token KV-cache allocations from stalling the run.
 
 ### Key Variables
 

+ 45 - 15
README.md

@@ -23,7 +23,7 @@ bot access -- all driven by a single `ansible-playbook deploy_ai.yml` command.
           ┌───────────────▼┐    ┌────▼──────────────────────┐
           │ coredns_host   │    │ ai_server                 │
           │ 192.168.1.29   │    │ 192.168.1.100             │
-          │                │    │                            
+          │                │    │                           │
           │ - CoreDNS      │    │ - Ollama (LLM inference)  │
           └────────────────┘    │ - Open WebUI              │
                                 │ - Keycloak (SSO/OIDC)     │
@@ -292,11 +292,13 @@ The benchmark playbook automatically selects the best coding models and keeps th
 Check the current slot assignments in `benchmarks/results/model_selection.json`:
 
 ```bash
-cat benchmarks/results/model_selection.json | python3 -m json.tool | grep slot
+python3 -m json.tool benchmarks/results/model_selection.json | grep slot
 ```
 
-Slots 3 and 4 are always coding-classified models. Use the `slot3_coding` model for
-primary work and `slot4_coding` for a lighter/faster alternative.
+Slots 3–6 are coding-classified models, all running on the Node 0 instance at port 11435.
+Use `slot3_coding` (the highest-scoring coding model) as your primary model. Connect coding
+tools directly to `https://ollama-api.<domain>` (proxied from port 11434, Node 1) or to
+Open WebUI which load-balances across both instances.
 
 ## Day-2 Operations
 
@@ -343,6 +345,13 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
   -e "benchmark_models=qwen2.5-coder:14b-instruct-q4_K_M,codestral:22b-v0.1-q4_K_M"
 ```
 
+**Override tier boundaries or timeouts (see [benchmarks/README.md](benchmarks/README.md#three-pass-execution)):**
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
+```
+
 **Pull recommended models if scores are below threshold:**
 
 ```bash
@@ -355,10 +364,20 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml -e "pull_if_better=
 ansible-playbook playbooks/04_models.yml -K -e @local.yml
 ```
 
-**Rotate slot 4 to a specific model:**
+**Rotate slot 5 (general) or slot 6 (coding) to a specific model:**
 
 ```bash
-ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot4_model=deepseek-r1:14b"
+# Swap general rotate slot
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
+
+# Swap coding rotate slot
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
+
+# Both at once
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
+
+# Reset both rotate slots back to benchmark recommendations
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
 ```
 
 **Redeploy Keycloak only:**
@@ -393,16 +412,25 @@ ansible-playbook playbooks/11_vault_oidc.yml -K -e @local.yml
 
 ## Model Slot System
 
-Four models are kept warm in RAM at all times (`OLLAMA_MAX_LOADED_MODELS=4`, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled by the benchmark playbook — no model names are hardcoded.
+Six models are kept warm across two Ollama instances (`OLLAMA_MAX_LOADED_MODELS=3` each, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled automatically by the benchmark playbook — no model names are hardcoded.
+
+```
+NUMA Node 1 — ollama.service     — port 11434  (general models)
+NUMA Node 0 — ollama-node0.service — port 11435 (coding models)
+```
+
+| Slot | Instance      | Port  | Role                    | Selection                     | Rotation                                    |
+|------|---------------|-------|-------------------------|-------------------------------|---------------------------------------------|
+| 1    | Node 1        | 11434 | General primary (locked) | Top general composite score  | Replaced only by re-benchmark               |
+| 2    | Node 1        | 11434 | General secondary (locked)| 2nd general composite score | Replaced only by re-benchmark               |
+| 5    | Node 1        | 11434 | General rotate           | 3rd general composite score   | `-e slot5_model=<name>`                     |
+| 3    | Node 0        | 11435 | Coding primary (locked)  | Top coding composite score    | Replaced only by re-benchmark               |
+| 4    | Node 0        | 11435 | Coding secondary (locked)| 2nd coding composite score    | Replaced only by re-benchmark               |
+| 6    | Node 0        | 11435 | Coding rotate            | 3rd coding composite score    | `-e slot6_model=<name>`                     |
 
-| Slot | Role                      | Selection                     | Rotation                              |
-|------|---------------------------|-------------------------------|---------------------------------------|
-| 1    | General-purpose primary   | Top general composite score   | Replaced if score < threshold         |
-| 2    | General-purpose secondary | 2nd general composite score   | Replaced if score < threshold         |
-| 3    | Coding primary            | Top coding composite score    | Locked; replaced only by re-benchmark |
-| 4    | Coding secondary          | 2nd coding composite score    | Rotatable: `-e slot4_model=<name>`    |
+**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.10; otherwise `general`.
 
-**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.15; otherwise `general`.
+**Modelfile aliases** (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) are excluded from benchmarking to prevent KV-cache allocation stalls.
 
 ## Verification Steps
 
@@ -416,8 +444,10 @@ After a full `deploy_ai.yml` run, verify the deployment (substitute your actual
 6. **Qdrant health** -- `curl -s http://<ai_server_ip>:6333/healthz` returns OK
 7. **CoreDNS resolution** -- `dig @<coredns_host_ip> vault.example.com` returns `<nginx_proxy_ip>`
 8. **NGINX configs** -- `ssh <nginx_proxy_ip> 'sudo nginx -t'` passes
-9. **OpenClaw** -- send a message to the Telegram bot, confirm response
+9. **OpenClaw** -- send a message to the Telegram bot, confirm response using slot1_general model
 10. **Benchmark report** -- check `benchmarks/results/benchmark_<timestamp>.md` for latest results
+11. **Node 0 Ollama** -- `curl -s -H "Authorization: Bearer <key>" http://<ai_server_ip>:11435/api/tags` returns model list
+12. **Both warmup services** -- `systemctl status ollama-warmup ollama-warmup-node0` both show `active (exited)`
 
 ## Role Reference
 

+ 117 - 66
benchmarks/README.md

@@ -3,133 +3,184 @@
 ## Overview
 
 Dynamic benchmark system for all installed Ollama models. Runs a suite of coding and
-general-purpose tests against every model currently available on the Ollama server,
-scores each model on a composite metric, and assigns models to the 4-slot system
-based on results.
+general-purpose tests against every model on the Ollama server, scores each model on a
+composite metric, and assigns models to the 6-slot dual-socket system based on results.
+
+Modelfile aliases (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`,
+`gemma-family`) are automatically excluded from benchmarking — they share weights with
+real models and their large context window parameters would stall every run with
+285-second KV-cache allocations.
 
 ## How to Run
 
 **Benchmark all installed models:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
 ```
 
 **Benchmark specific models only:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml -e '{"benchmark_specific_models":["qwen2.5-coder:14b","deepseek-coder-v2:16b"]}'
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_models=qwen2.5-coder:14b,deepseek-coder-v2:16b"
 ```
 
-**Benchmark with automatic model pulling if a better model is found:**
+**Benchmark and immediately push 6-slot warm-up selections:**
 
 ```bash
-ansible-playbook playbooks/05_benchmark.yml -e pull_if_better=true
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+## Three-Pass Execution
+
+Models are split into three size tiers before benchmarking. Each tier gets its own
+per-request timeout to avoid small models waiting behind 70 B giants:
+
+| Tier   | RAM threshold | Timeout | Description                       |
+|--------|---------------|---------|-----------------------------------|
+| Small  | < 10 GB       | 300 s   | 7 B and under — fast path         |
+| Medium | 10–15 GB      | 900 s   | 16 B lite / 12 B — standard wait  |
+| Large  | > 15 GB       | 1200 s  | 34 B+ — 20-minute ceiling         |
+
+**Size source vs runtime RAM:** `ollama list` reports on-disk (compressed) sizes, which
+are smaller than actual runtime RAM usage (model weights + KV cache + overhead). A
+`benchmark_size_overhead_factor` (default `1.2`) is applied when computing tier
+boundaries: the disk-size cutoffs are divided by the factor before comparison. For
+example, with default settings a 9 GB on-disk model is treated as ~10.8 GB at runtime
+and falls in the medium tier rather than small.
+
+**Override tier boundaries:**
+
+```bash
+# Adjust where small/medium boundary sits
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
+
+# Tune the overhead factor if your models load larger/smaller than expected
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_size_overhead_factor=1.25"
+
+# Override timeouts only
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
+  -e "benchmark_medium_timeout=600 benchmark_large_timeout=1800"
 ```
 
 ## Test Suites
 
 ### Coding Tests
 
-| Test       | Prompt                                                         | What Is Measured              |
-|------------|----------------------------------------------------------------|-------------------------------|
-| `code_gen` | "Write a Python function that implements binary search on a sorted list. Include type hints and docstring." | Correctness (def + return present), code structure, tokens/sec |
-| `debug`    | "Find and fix the bug in this Python code: `def factorial(n): return n * factorial(n)`. Explain the issue." | Identifies base case bug, explanation quality, tokens/sec |
-| `refactor` | "Refactor this code to use list comprehension: `result = []; for i in range(10): if i % 2 == 0: result.append(i*i)`" | Produces list comprehension, conciseness, tokens/sec |
+| Test       | Prompt                                                                     | What Is Measured                                   |
+|------------|----------------------------------------------------------------------------|----------------------------------------------------|
+| `code_gen` | Write a Python merge sort with type hints, docstring, and 3 unit tests     | `def`, `return`, `"""`, `->`, `assert`, `def test_`, `import` |
+| `debug`    | Find and fix 3 bugs in a given Python function                             | `def`, `return`, code block, `assert`              |
+| `refactor` | Refactor a loop for readability and performance                            | `def`, `return`, code block, type hint, `import`   |
 
 ### General Tests
 
-| Test        | Prompt                                                        | What Is Measured              |
-|-------------|---------------------------------------------------------------|-------------------------------|
-| `explain`   | "Explain the concept of recursion to a beginner programmer. Use a simple analogy." | Clarity, analogy presence, length adequacy, tokens/sec |
-| `creative`  | "Write a short poem about artificial intelligence."           | Creativity (line count, poetic structure), tokens/sec |
-| `reasoning` | "A farmer has 17 sheep. All but 9 die. How many are left? Explain your reasoning step by step." | Correct answer (9), step-by-step reasoning, tokens/sec |
+| Test        | Prompt                                                     | What Is Measured                                     |
+|-------------|------------------------------------------------------------|------------------------------------------------------|
+| `explain`   | Explain how Python's GIL works and when it matters         | Response length, paragraph structure, list formatting |
+| `creative`  | Suggest 5 fun family activities for a rainy weekend        | Response length, paragraph structure, list formatting |
+| `reasoning` | Apple arithmetic word problem                              | Response length, paragraph structure, list formatting |
 
 ### Latency Test
 
-| Test      | Prompt | What Is Measured           |
-|-----------|--------|----------------------------|
-| `latency` | "Hi"   | Time to first token (TTFT) |
+| Test      | Prompt | What Is Measured                                   |
+|-----------|--------|----------------------------------------------------|
+| `latency` | "Hi"   | Total response time (eval + prompt eval), used as TTFT proxy |
 
 ## Scoring
 
-### Metrics Collected from Ollama API
-
-- **tokens/sec** -- generation throughput from `/api/generate` response
-- **TTFT** (time to first token) -- measured from request start to first streamed token
-- **Quality heuristics** -- regex and length checks specific to each test type
-
 ### Composite Score Formula
 
 For each category (coding, general), a composite score is calculated:
 
 ```
-composite = (quality * 0.45) + (tokens_per_sec_normalized * 0.30) + (latency_score * 0.25)
+composite = (quality * 0.45) + (tokens_per_sec / ceiling, capped 1.0) * 0.30
+          + (1 - ttft_ms / 5000, floored 0) * 0.25
 ```
 
 Where:
-- `quality` is 0.0-1.0 based on heuristic checks for the test type
-- `tokens_per_sec_normalized` is the model's tokens/sec divided by the fastest model's tokens/sec
-- `latency_score` is 1.0 - (model_ttft / slowest_ttft)
+- `quality` — 0.0–1.0 from heuristic checks per test type (see CLAUDE.md for weights)
+- `tokens_per_sec` — averaged across all test responses; normalized against `benchmark_toks_norm_ceiling` (default 40)
+- `ttft_ms` — latency test response time in milliseconds
 
 ### Classification Rule
 
-A model is classified as a **coding** model if:
+A model is classified as **coding** if:
 
 ```
-coding_composite - general_composite >= 0.15
+coding_composite - general_composite >= benchmark_coding_threshold   # default 0.10
 ```
 
-Otherwise it is classified as **general**.
+Name-pattern heuristics (`coder`, `codestral`, `codellama`, `starcoder`) apply as a
+tiebreaker. Category can also be forced with `model_category_overrides` in `group_vars/all.yml`.
 
 ## Thresholds and Configuration
 
-All thresholds are configurable via `group_vars/all.yml`:
-
-| Key                            | Default | Description                                    |
-|--------------------------------|---------|------------------------------------------------|
-| `benchmark_min_tokens_per_sec` | 10      | Minimum tokens/sec to pass a model             |
-| `benchmark_max_ttft_ms`        | 5000    | Maximum time to first token in milliseconds    |
-| `benchmark_quality_weight`     | 0.45    | Weight of quality score in composite            |
-| `benchmark_speed_weight`       | 0.30    | Weight of tokens/sec in composite               |
-| `benchmark_latency_weight`     | 0.25    | Weight of latency score in composite            |
-| `benchmark_coding_threshold`   | 0.15    | Minimum coding-general delta for coding classification |
+All thresholds are configurable in `inventory/group_vars/all.yml`:
+
+| Key                               | Default | Description                                            |
+|-----------------------------------|---------|--------------------------------------------------------|
+| `benchmark_thresholds.min_tokens_per_sec`  | 5.0  | Minimum tok/sec to be slot-eligible          |
+| `benchmark_thresholds.min_quality_score`   | 0.6  | Minimum quality score to be slot-eligible    |
+| `benchmark_thresholds.min_composite_score` | 0.55 | Minimum composite to avoid threshold warning |
+| `benchmark_toks_norm_ceiling`     | 40      | tok/sec ceiling for normalization (dual-socket target) |
+| `benchmark_coding_threshold`      | 0.10    | coding-general composite delta for classification      |
+| `benchmark_small_max_gb`          | 10      | Runtime RAM upper bound for small pass (GB)            |
+| `benchmark_medium_max_gb`         | 15      | Runtime RAM upper bound for medium pass (GB)           |
+| `benchmark_size_overhead_factor`  | 1.2     | Multiplier applied to `ollama list` disk sizes to estimate runtime RAM |
+| `benchmark_small_timeout`         | 300     | Per-request timeout for small models (seconds)         |
+| `benchmark_medium_timeout`        | 900     | Per-request timeout for medium models (seconds)        |
+| `benchmark_large_timeout`         | 1200    | Per-request timeout for large models (seconds)         |
+| `benchmark_skip_aliases`          | see below| Modelfile aliases excluded from benchmark loop        |
+
+Default `benchmark_skip_aliases`:
+```yaml
+- coder-128k
+- coder-32k
+- coder-rotate
+- llama-family
+- gemma-family
+```
 
 ## Output Format
 
 ### Benchmark Report
 
-Each run produces `benchmarks/benchmark_<timestamp>.md` with a results table:
+Each run produces `benchmarks/results/benchmark_<timestamp>.md`. The slot table now
+covers all 6 slots across both NUMA instances:
 
 ```
-| Model                  | Coding Composite | General Composite | Classification | Tokens/sec | TTFT (ms) |
-|------------------------|------------------|-------------------|----------------|------------|-----------|
-| qwen2.5-coder:14b      | 0.82             | 0.65              | coding         | 38.2       | 420       |
-| deepseek-coder-v2:16b  | 0.78             | 0.63              | coding         | 35.1       | 510       |
-| llama3.1:8b            | 0.61             | 0.74              | general        | 52.3       | 280       |
-| mistral:7b             | 0.58             | 0.71              | general        | 55.8       | 250       |
+| Slot | Socket              | Role            | Model                     | Composite |
+|------|---------------------|-----------------|---------------------------|-----------|
+| 1    | Node 1 (port 11434) | General (locked)| llama3.1:8b               | 0.74      |
+| 2    | Node 1 (port 11434) | General (locked)| mistral:latest            | 0.71      |
+| 5    | Node 1 (port 11434) | General (rotate)| llama3.2:3b               | 0.63      |
+| 3    | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b     | 0.82      |
+| 4    | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b          | 0.78      |
+| 6    | Node 0 (port 11435) | Coding (rotate) | codegemma:7b              | 0.69      |
 ```
 
-### Model Selection File
+### model_selection.json
 
-Results are also written to `model_selection.json`:
+Results are written to `benchmarks/results/model_selection.json`:
 
 ```json
 {
-  "timestamp": "2025-01-15T10:30:00Z",
-  "slot1_coding": "qwen2.5-coder:14b",
-  "slot2_general": "llama3.1:8b",
-  "slot3_backup": "deepseek-coder-v2:16b",
-  "slot4_experimental": null,
-  "results": { ... }
+  "slot1_general": "llama3.1:8b",
+  "slot2_general": "mistral:latest",
+  "slot5_general_rotate": "llama3.2:3b",
+  "slot3_coding": "deepseek-coder-v2:16b",
+  "slot4_coding": "qwen2.5-coder:7b",
+  "slot6_coding_rotate": "codegemma:7b",
+  "general_ranking": [...],
+  "coding_ranking": [...],
+  "all_metrics": { ... }
 }
 ```
 
-## Slot Selection
-
-Slots are assigned from benchmark results as follows:
-
-1. **Slot 1 (Primary Coding)** -- model with the highest `coding_composite` score
-2. **Slot 2 (Primary General)** -- model with the highest `general_composite` score
-3. **Slot 3 (Secondary / Backup)** -- next-best model by overall average composite
-4. **Slot 4 (Experimental)** -- not assigned by benchmarks; set manually via `-e slot4_model=<name>`
+This file is read by `04_models.yml` to decide what to pull and warm up. It is committed
+to the repo so slot selections survive a clean checkout.

+ 17 - 6
benchmarks/results/benchmark_20260307T170059.md

@@ -1,15 +1,20 @@
 # Benchmark Results - 20260307T170059
 
 ## Model Selection
-| Slot | Role | Model | Composite Score |
-|------|------|-------|----------------|
-| 1 | General (Primary) | llama3.2:3b | 0.967 |
-| 2 | General (Secondary) | llama3.2:3b | 0.967 |
-| 3 | Coding (Primary) | deepseek-coder-v2 | 0.738 |
-| 4 | Coding (Secondary) | qwen2.5-coder:7b | 0.63 |
+
+
+| Slot | Role                | Model             | Composite Score |
+| ---- | ------------------- | ----------------- | --------------- |
+| 1    | General (Primary)   | llama3.2:3b       | 0.967           |
+| 2    | General (Secondary) | llama3.2:3b       | 0.967           |
+| 3    | Coding (Primary)    | deepseek-coder-v2 | 0.738           |
+| 4    | Coding (Secondary)  | qwen2.5-coder:7b  | 0.63            |
+
 
 ## Detailed Metrics
+
 ### deepseek-coder-v2
+
 - **Category**: coding
 - **Coding Quality**: 0.667
 - **General Quality**: 0.918
@@ -17,7 +22,9 @@
 - **Latency (ms)**: 1744.5
 - **Coding Composite**: 0.738
 - **General Composite**: 0.852
+
 ### qwen2.5-coder:7b
+
 - **Category**: coding
 - **Coding Quality**: 0.64
 - **General Quality**: 0.922
@@ -25,7 +32,9 @@
 - **Latency (ms)**: 1211.5
 - **Coding Composite**: 0.63
 - **General Composite**: 0.757
+
 ### llama3.2:3b
+
 - **Category**: general
 - **Coding Quality**: 0.607
 - **General Quality**: 0.991
@@ -35,7 +44,9 @@
 - **General Composite**: 0.967
 
 ## Scoring Formula
+
 - Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
 - Speed normalized against 22 tok/sec ceiling (hardware-observed max)
 - Coding quality: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
 - Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 92 - 0
benchmarks/results/benchmark_20260307T184212.md

@@ -0,0 +1,92 @@
+# Benchmark Results - 20260307T184212
+
+## Model Selection
+| Slot | Role | Model | Composite Score |
+|------|------|-------|----------------|
+| 1 | General (Primary) | llama3.2:3b | 0.001 |
+| 2 | General (Secondary) | gemma-family:latest | 0.0 |
+| 3 | Coding (Primary) | coder-128k:latest | 0.001 |
+| 4 | Coding (Secondary) | coder-32k:latest | 0.001 |
+
+## Detailed Metrics
+### gemma-family:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama-family:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### coder-128k:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 285394.5
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### coder-32k:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 142328.6
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 143942.9
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 139756.5
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 22 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 147 - 0
benchmarks/results/benchmark_20260308T003605.md

@@ -0,0 +1,147 @@
+# Benchmark Results - 20260308T003605
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                 | Composite Score |
+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.001           |
+| 2    | Node 1 (port 11434) | General (locked) | command-r:35b         | 0.0             |
+| 5    | Node 1 (port 11434) | General (rotate) | llama3.1:70b          | 0.0             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | codellama:34b         | 0.0             |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.0             |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:14b     | 0.0             |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0.008
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 221414.9
+- **Coding Composite**: 0.0
+- **General Composite**: 0.004
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### qwen2.5-coder:14b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 239690.0
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### command-r:35b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 169971.8
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.1:70b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### mistral-nemo:latest
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### mistral:latest
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 130127.2
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 70 - 0
benchmarks/results/benchmark_20260308T145246.md

@@ -0,0 +1,70 @@
+# Benchmark Results - 20260308T145246
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
+| 2 | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.0 |
+| 5 | Node 1 (port 11434) | General (rotate) | mistral:latest | 0.0 |
+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
+
+## Detailed Metrics
+### mistral-nemo:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### mistral:latest
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 109301.3
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 57 - 0
benchmarks/results/benchmark_20260308T215747.md

@@ -0,0 +1,57 @@
+# Benchmark Results - 20260308T215747
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model               | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b         | 0.45            |
+| 2    | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.45            |
+| 5    | Node 1 (port 11434) | General (rotate) | none                | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                | N/A             |
+
+
+## Detailed Metrics
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.917
+- **General Quality**: 1.0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.413
+- **General Composite**: 0.45
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.823
+- **General Quality**: 0.85
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.371
+- **General Composite**: 0.383
+
+### mistral-nemo:latest
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 1.0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 9999
+- **Coding Composite**: 0.383
+- **General Composite**: 0.45
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 54 - 0
benchmarks/results/benchmark_20260309T080551.md

@@ -0,0 +1,54 @@
+# Benchmark Results - 20260309T080551
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
+| 2 | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M | 0.0 |
+| 5 | Node 1 (port 11434) | General (rotate) | none | N/A |
+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.316 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.0 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
+
+## Detailed Metrics
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 676104.3
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 154480.0
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.0
+- **Latency (ms)**: 722357.3
+- **Coding Composite**: 0.0
+- **General Composite**: 0.0
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.7
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 145493.5
+- **Coding Composite**: 0.316
+- **General Composite**: 0.001
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 47 - 0
benchmarks/results/benchmark_20260309T174604.md

@@ -0,0 +1,47 @@
+# Benchmark Results - 20260309T174604
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model            | Composite Score |
+| ---- | ------------------- | ---------------- | ---------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
+| 5    | Node 1 (port 11434) | General (rotate) | none             | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none             | N/A             |
+
+
+## Detailed Metrics
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 108021.2
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0
+- **General Quality**: 0
+- **Avg Tokens/sec**: 0.1
+- **Latency (ms)**: 146781.6
+- **Coding Composite**: 0.001
+- **General Composite**: 0.001
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 67 - 0
benchmarks/results/benchmark_20260310T094843.md

@@ -0,0 +1,67 @@
+# Benchmark Results - 20260310T094843
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.814           |
+| 2    | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M     | 0.484           |
+| 5    | Node 1 (port 11434) | General (rotate) | none                     | N/A             |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.693           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b         | 0.638           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                     | N/A             |
+
+
+## Detailed Metrics
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 22.8
+- **Latency (ms)**: 1612.6
+- **Coding Composite**: 0.693
+- **General Composite**: 0.739
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.4
+- **Latency (ms)**: 661.8
+- **Coding Composite**: 0.767
+- **General Composite**: 0.814
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.5
+- **Latency (ms)**: 5730.8
+- **Coding Composite**: 0.431
+- **General Composite**: 0.484
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1359.5
+- **Coding Composite**: 0.638
+- **General Composite**: 0.687
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 117 - 0
benchmarks/results/benchmark_20260310T102149.md

@@ -0,0 +1,117 @@
+# Benchmark Results - 20260310T102149
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.819           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.621           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.484           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.707           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.681           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest     | 0.644           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4350.0
+- **Coding Composite**: 0.409
+- **General Composite**: 0.32
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.6
+- **Latency (ms)**: 1586.8
+- **Coding Composite**: 0.707
+- **General Composite**: 0.753
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2223.7
+- **Coding Composite**: 0.549
+- **General Composite**: 0.608
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.783
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 22.2
+- **Latency (ms)**: 1759.1
+- **Coding Composite**: 0.681
+- **General Composite**: 0.727
+
+### qwen2.5-coder:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1239.2
+- **Coding Composite**: 0.644
+- **General Composite**: 0.694
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.8
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2251.2
+- **Coding Composite**: 0.586
+- **General Composite**: 0.621
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.8
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.3
+- **Latency (ms)**: 1258.3
+- **Coding Composite**: 0.639
+- **General Composite**: 0.689
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 5701.3
+- **Coding Composite**: 0.432
+- **General Composite**: 0.484
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.85
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.7
+- **Latency (ms)**: 613.5
+- **Coding Composite**: 0.772
+- **General Composite**: 0.819
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 94 - 0
benchmarks/results/benchmark_20260310T110632.md

@@ -0,0 +1,94 @@
+# Benchmark Results - 20260310T110632
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
+
+## Detailed Metrics
+### codellama:34b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4244.1
+- **Coding Composite**: 0.437
+- **General Composite**: 0.326
+### deepseek-coder-v2:latest
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 25.0
+- **Latency (ms)**: 1543.2
+- **Coding Composite**: 0.735
+- **General Composite**: 0.758
+### deepseek-coder-v2:16b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.5
+- **Latency (ms)**: 1415.1
+- **Coding Composite**: 0.738
+- **General Composite**: 0.762
+### qwen2.5-coder:14B
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2195.9
+- **Coding Composite**: 0.572
+- **General Composite**: 0.609
+### qwen2.5-coder:latest
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.8
+- **Latency (ms)**: 1228.2
+- **Coding Composite**: 0.667
+- **General Composite**: 0.694
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2249.3
+- **Coding Composite**: 0.596
+- **General Composite**: 0.621
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.7
+- **Latency (ms)**: 1231.9
+- **Coding Composite**: 0.666
+- **General Composite**: 0.693
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.4
+- **Latency (ms)**: 6355.8
+- **Coding Composite**: 0.441
+- **General Composite**: 0.483
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 22.3
+- **Latency (ms)**: 644.2
+- **Coding Composite**: 0.785
+- **General Composite**: 0.814
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 107 - 0
benchmarks/results/benchmark_20260310T122818.md

@@ -0,0 +1,107 @@
+# Benchmark Results - 20260310T122818
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                 | Composite Score |
+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.835           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b           | 0.624           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M  | 0.481           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.727           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b      | 0.674           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest  | 0.671           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4261.3
+- **Coding Composite**: 0.436
+- **General Composite**: 0.325
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.1
+- **Latency (ms)**: 1583.1
+- **Coding Composite**: 0.727
+- **General Composite**: 0.75
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2172.1
+- **Coding Composite**: 0.573
+- **General Composite**: 0.61
+
+### qwen2.5-coder:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.4
+- **Latency (ms)**: 1102.0
+- **Coding Composite**: 0.671
+- **General Composite**: 0.698
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.9
+- **Latency (ms)**: 2186.7
+- **Coding Composite**: 0.6
+- **General Composite**: 0.624
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.6
+- **Latency (ms)**: 1073.7
+- **Coding Composite**: 0.674
+- **General Composite**: 0.701
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.2
+- **Latency (ms)**: 6142.8
+- **Coding Composite**: 0.439
+- **General Composite**: 0.481
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 24.5
+- **Latency (ms)**: 568.5
+- **Coding Composite**: 0.806
+- **General Composite**: 0.835
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 107 - 0
benchmarks/results/benchmark_20260310T160815.md

@@ -0,0 +1,107 @@
+# Benchmark Results - 20260310T160815
+
+## Model Selection (6-slot / 2-socket)
+
+
+| Slot | Socket              | Role             | Model                    | Composite Score |
+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.832           |
+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.624           |
+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.482           |
+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.737           |
+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.735           |
+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:7b         | 0.666           |
+
+
+## Detailed Metrics
+
+### codellama:34b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4336.2
+- **Coding Composite**: 0.432
+- **General Composite**: 0.321
+
+### deepseek-coder-v2:latest
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.1
+- **Latency (ms)**: 1411.4
+- **Coding Composite**: 0.735
+- **General Composite**: 0.759
+
+### deepseek-coder-v2:16b
+
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 24.2
+- **Latency (ms)**: 1383.8
+- **Coding Composite**: 0.737
+- **General Composite**: 0.76
+
+### qwen2.5-coder:14B
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2181.0
+- **Coding Composite**: 0.573
+- **General Composite**: 0.609
+
+### llama3.1:8b
+
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2183.4
+- **Coding Composite**: 0.6
+- **General Composite**: 0.624
+
+### qwen2.5-coder:7b
+
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.6
+- **Latency (ms)**: 1210.0
+- **Coding Composite**: 0.666
+- **General Composite**: 0.693
+
+### gemma3:12b-it-q4_K_M
+
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.2
+- **Latency (ms)**: 5540.1
+- **Coding Composite**: 0.44
+- **General Composite**: 0.482
+
+### llama3.2:3b
+
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 24.2
+- **Latency (ms)**: 581.0
+- **Coding Composite**: 0.803
+- **General Composite**: 0.832
+
+## Scoring Formula
+
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
+

+ 78 - 0
benchmarks/results/benchmark_20260310T170013.md

@@ -0,0 +1,78 @@
+# Benchmark Results - 20260310T170013
+
+## Model Selection (6-slot / 2-socket)
+| Slot | Socket | Role | Model | Composite Score |
+|------|--------|------|-------|----------------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.57 |
+
+## Detailed Metrics
+### codellama:34b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.586
+- **Avg Tokens/sec**: 3.2
+- **Latency (ms)**: 4235.4
+- **Coding Composite**: 0.437
+- **General Composite**: 0.326
+### deepseek-coder-v2:16b
+- **Category**: coding
+- **Coding Quality**: 0.833
+- **General Quality**: 0.885
+- **Avg Tokens/sec**: 23.5
+- **Latency (ms)**: 1568.5
+- **Coding Composite**: 0.723
+- **General Composite**: 0.746
+### qwen2.5-coder:14B
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.931
+- **Avg Tokens/sec**: 6.6
+- **Latency (ms)**: 2229.7
+- **Coding Composite**: 0.57
+- **General Composite**: 0.607
+### llama3.1:8b
+- **Category**: general
+- **Coding Quality**: 0.823
+- **General Quality**: 0.877
+- **Avg Tokens/sec**: 11.8
+- **Latency (ms)**: 2202.0
+- **Coding Composite**: 0.599
+- **General Composite**: 0.623
+### qwen2.5-coder:7b
+- **Category**: coding
+- **Coding Quality**: 0.85
+- **General Quality**: 0.91
+- **Avg Tokens/sec**: 12.5
+- **Latency (ms)**: 1431.0
+- **Coding Composite**: 0.655
+- **General Composite**: 0.682
+### gemma3:12b-it-q4_K_M
+- **Category**: general
+- **Coding Quality**: 0.873
+- **General Quality**: 0.966
+- **Avg Tokens/sec**: 6.1
+- **Latency (ms)**: 5941.9
+- **Coding Composite**: 0.439
+- **General Composite**: 0.481
+### llama3.2:3b
+- **Category**: general
+- **Coding Quality**: 0.89
+- **General Quality**: 0.954
+- **Avg Tokens/sec**: 23.0
+- **Latency (ms)**: 754.8
+- **Coding Composite**: 0.786
+- **General Composite**: 0.814
+
+## Scoring Formula
+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
+- Coding quality (per-prompt):
+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general

+ 433 - 0
benchmarks/results/benchmark_review_20260310.md

@@ -0,0 +1,433 @@
+# Ticket Summary — Post-Change Benchmark Review: num_predict 300 → 500
+
+## Description
+
+After resolving the dual NUMA/CPUAffinity performance regression (2026-03-10), two
+post-fix benchmark runs were executed to validate the effect of raising
+`benchmark_num_predict` from 300 to 500. This document captures the four-run history,
+before/after comparison, full Run 4 model results, and findings on system tuning state.
+
+---
+
+## Acceptance Criteria
+
+- [x] Run 3 (num_predict=300) and Run 4 (num_predict=500) compared on common models
+- [x] All tuning variables reviewed and declared optimal or requiring action
+- [x] Any model-identity anomalies flagged for follow-up
+- [x] MEMORY.md updated with current variable values
+- [x] This ticket summary written to `benchmarks/results/`
+
+---
+
+## Work Implemented
+
+### Run History
+
+| Run | Timestamp | Condition | Result |
+|-----|-----------|-----------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300, 4 models | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | Post-NUMA-fix, num_predict=500, 9 models | quality=0.83–0.97, tok/sec=3.2–25.0 |
+
+### Before vs. After (Runs 3 → 4, common models)
+
+| Model | coding_quality @ 300 | coding_quality @ 500 | Delta |
+|-------|---------------------|---------------------|-------|
+| deepseek-coder-v2:latest | 0.783 | 0.833 | +0.050 |
+| qwen2.5-coder:7b | 0.800 | 0.850 | +0.050 |
+| llama3.2:3b | 0.850 | 0.890 | +0.040 |
+| gemma3:12b-it-q4_K_M | 0.850 | 0.873 | +0.023 |
+
+### Full Run 4 Results (num_predict=500, 9 models)
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_composite | general_composite | category |
+|-------|---------|----------|-----------|------------|-----------------|------------------|----------|
+| deepseek-coder-v2:16b | 24.5 | 0.833 | 0.885 | 1415.1 | 0.738 | 0.762 | coding |
+| deepseek-coder-v2:latest | 25.0 | 0.833 | 0.885 | 1543.2 | 0.735 | 0.758 | coding |
+| qwen2.5-coder:latest | 12.8 | 0.850 | 0.910 | 1228.2 | 0.667 | 0.694 | coding |
+| qwen2.5-coder:7b | 12.7 | 0.850 | 0.910 | 1231.9 | 0.666 | 0.693 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2195.9 | 0.572 | 0.609 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4244.1 | 0.437 | 0.326 | coding |
+| llama3.2:3b | 22.3 | 0.890 | 0.954 | 644.2 | 0.785 | 0.814 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2249.3 | 0.596 | 0.621 | general |
+| gemma3:12b-it-q4_K_M | 6.4 | 0.873 | 0.966 | 6355.8 | 0.441 | 0.483 | general |
+
+### Current Slot Assignments (model_selection.json)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
+
+### Tuning Variable Status
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is now the binding constraint |
+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at current 3–25 tok/sec speeds |
+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 62.5% of ceiling |
+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback handling remaining cases |
+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving platform |
+
+### Findings
+
+**Finding 1 — num_predict=500 confirmed correct.** Every model improved on coding_quality
+(+0.023 to +0.050). No timeouts observed. The rubric ceiling is now the binding constraint;
+further increases (700+) would yield at most +0.02 additional improvement.
+
+**Finding 2 — Coding quality inversion narrowed (expected, not a bug).** Coding specialists
+score lower on coding than general quality because general prompts don't require `assert`,
+`test_def`, or `type_hint` (the hardest scoring markers). The gap halved from ~−0.110 to
+~−0.052 vs. Run 3, confirming truncation was part of the cause. Name-pattern fallback
+continues to correctly classify these models.
+
+**Finding 3 — deepseek-coder-v2:16b and :latest may be the same weights (ACTION REQUIRED).**
+Both share identical quality scores (0.833/0.885) and nearly identical throughput (24.5 vs.
+25.0 tok/sec). In Ollama, `:latest` typically resolves to the same weights as the default
+variant. If confirmed identical, slots 3 and 4 hold duplicate models — zero benefit, wasted
+VRAM. See Testing Needed for verification steps.
+
+**Finding 4 — qwen2.5-coder:latest and :7b are near-identical (informational).** Composites
+of 0.667 vs. 0.666. Lower impact since only one is active in slot 6 at a time.
+
+**Finding 5 — llama3.2:3b outperforms coding specialists on coding composite (informational).**
+coding_composite=0.785 beats all dedicated coding models. Mathematically correct: speed
+(22.3 tok/sec) and latency (644ms) dominate. Correctly classified general because
+general_composite (0.814) > coding_composite (0.785), delta < 0.10 threshold.
+
+**Finding 6 — codellama:34b correctly excluded.** 3.2 tok/sec, general_quality=0.586 falls
+below min_quality_score=0.6. Scoring system worked as designed.
+
+---
+
+## Testing Needed
+
+### Finding 3 — Verify deepseek-coder-v2:16b vs :latest digest
+
+Run on `ai_server`:
+
+```bash
+ollama show deepseek-coder-v2:16b --modelfile | grep FROM
+ollama show deepseek-coder-v2:latest --modelfile | grep FROM
+```
+
+**If digests match (same weights):** update `model_selection.json` slot4_coding manually
+(or remove one deepseek variant and re-run `03_benchmark.yml`) to redirect slot 4 to
+`qwen2.5-coder:14B` (composite=0.572) or another diverse candidate for model diversity.
+
+**If digests differ (different weights):** no action — the pipeline is working as designed.
+
+### Regression check after any slot4 change
+
+If slot4 is redirected, run:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+Confirm both warmup services start cleanly:
+
+```bash
+systemctl status ollama-warmup.service ollama-warmup-node0.service
+```
+
+---
+
+# Addendum — Run 5 Review (post deepseek:latest removal)
+
+## Run History (all five runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+
+## Run 4 → Run 5 Comparison (all common models)
+
+| Model | R4 tok/sec | R5 tok/sec | R4 coding_comp | R5 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.5 | 24.1 | 0.738 | 0.727 | −0.011 (noise) |
+| qwen2.5-coder:latest | 12.8 | 12.4 | 0.667 | 0.671 | +0.004 (noise) |
+| qwen2.5-coder:7b | 12.7 | 12.6 | 0.666 | 0.674 | +0.008 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.572 | 0.573 | +0.001 (noise) |
+| llama3.2:3b | 22.3 | 24.5 | 0.785 | 0.806 | +0.021 (notable) |
+| llama3.1:8b | 11.8 | 11.9 | 0.596 | 0.600 | +0.004 (noise) |
+| gemma3:12b-it-q4_K_M | 6.4 | 6.2 | 0.441 | 0.439 | −0.002 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.437 | 0.436 | −0.001 (noise) |
+
+Quality scores (coding_quality, general_quality) are **identical** across both runs —
+confirming rubric scores are stable and deterministic at num_predict=500.
+
+## Run 5 Slot Assignments (model_selection.json)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.835 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.727 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.674 |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.671 |
+
+Note: slot4 is `qwen2.5-coder:7b` — the pipeline correctly ranked it #2 coding (0.674),
+superseding the manual `qwen2.5-coder:14B` edit made earlier this session.
+
+## Findings
+
+**Finding 1 — System is stable; tuning parameters remain optimal (no action).** All quality
+scores are identical between Run 4 and Run 5. Speed and latency deltas are within normal
+run-to-run variance (±0.4 tok/sec, ±200ms TTFT). No tuning changes needed.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is binding constraint |
+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at 3–25 tok/sec |
+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 61% of ceiling |
+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback working |
+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving |
+
+**Finding 2 — llama3.2:3b improved after deepseek:latest removal (informational).**
+tok/sec: 22.3 → 24.5 (+2.2), general_composite: 0.814 → 0.835 (+0.021). Likely cause:
+removing one large model reduced memory pressure / NUMA contention during warmup. The 3b
+model benefits most as it runs fastest and competes most for memory bandwidth.
+
+**Finding 3 — qwen2.5-coder:7b and :latest confirmed duplicate weights (RESOLVED).**
+Run 5 slot4=`:7b` (0.674) and slot6=`:latest` (0.671) showed identical quality scores
+(coding=0.850, general=0.910) and nearly identical throughput (~12.4–12.8 tok/sec) across
+both runs — same pattern as the deepseek duplicate. Verified on ai_server:
+
+```
+qwen2.5-coder:7b    → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
+qwen2.5-coder:latest → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
+```
+
+Digests match. `qwen2.5-coder:latest` removed. Next step: re-run `03_benchmark.yml` (Run 6)
+to promote `qwen2.5-coder:14B` to slot6_rotate, achieving genuine speed/quality diversity
+on Node 0:
+- slot3: deepseek-coder-v2:16b — fast+deep (24 tok/sec, 16B)
+- slot4: qwen2.5-coder:7b — fast+light (12.6 tok/sec, 7B)
+- slot6: qwen2.5-coder:14B — slower+richer quality (6.6 tok/sec, 14B)
+
+**Finding 4 — gemma3:12b latency_score=0 is persistent (informational, no action).**
+TTFT consistently 6.1–6.4 seconds, above the 5000ms floor → latency_score=0 every run.
+Hardware-limited (large quant loading time on Node 1), not a tuning issue. The model
+correctly holds slot5_general_rotate on the strength of general_quality=0.966. The latency
+penalty is working as intended.
+
+**Finding 5 — codellama:34b remains correctly excluded (informational, no action).**
+composite=0.436, general_quality=0.586 — below both min_composite_score=0.55 and
+min_quality_score=0.6 every run. Pipeline working as designed.
+
+## Next Action
+
+Run 6: re-benchmark after `qwen2.5-coder:latest` removal to promote `qwen2.5-coder:14B`
+to slot6_rotate and achieve model diversity on Node 0.
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+---
+
+# Addendum — Run 6 Review (post qwen2.5-coder:latest removal)
+
+## Run History (all six runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
+
+## Full Run 6 Results
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
+| deepseek-coder-v2:16b | 24.2 | 0.833 | 0.885 | 1383.8 | 0.737 | 0.760 | coding |
+| deepseek-coder-v2:latest | 24.1 | 0.833 | 0.885 | 1411.4 | 0.735 | 0.759 | coding |
+| qwen2.5-coder:7b | 12.6 | 0.850 | 0.910 | 1210.0 | 0.666 | 0.693 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2181.0 | 0.573 | 0.609 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4336.2 | 0.432 | 0.321 | coding |
+| llama3.2:3b | 24.2 | 0.890 | 0.954 | 581.0 | 0.803 | 0.832 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2183.4 | 0.600 | 0.624 | general |
+| gemma3:12b-it-q4_K_M | 6.2 | 0.873 | 0.966 | 5540.1 | 0.440 | 0.482 | general |
+
+## Run 5 → Run 6 Comparison (all common models)
+
+| Model | R5 tok/sec | R6 tok/sec | R5 coding_comp | R6 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.1 | 24.2 | 0.727 | 0.737 | +0.010 (noise) |
+| qwen2.5-coder:7b | 12.6 | 12.6 | 0.674 | 0.666 | −0.008 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.573 | 0.000 |
+| llama3.2:3b | 24.5 | 24.2 | 0.806 | 0.803 | −0.003 (noise) |
+| llama3.1:8b | 11.9 | 11.8 | 0.600 | 0.600 | 0.000 |
+| gemma3:12b-it-q4_K_M | 6.2 | 6.2 | 0.439 | 0.440 | +0.001 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.436 | 0.432 | −0.004 (noise) |
+
+Quality scores are **identical** across all common models. All composites within run-to-run
+noise (≤ ±0.010). Rubric confirmed deterministic across 6 runs.
+
+## Run 6 Slot Assignments (model_selection.json — current state)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.832 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.482 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.737 |
+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 ← REGRESSION |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:7b | 0.666 |
+
+## Findings
+
+**Finding 1 — deepseek-coder-v2:latest re-appeared in slot4 (REGRESSION, now fixed).**
+Previously confirmed duplicate of `:16b` and removed after Run 4. Re-appeared in Run 6
+because `group_vars/all.yml` contained two pull sources:
+
+1. `baseline_models` (line 121): `"deepseek-coder-v2"` — untagged, Ollama resolves to
+   `:latest`, re-pulling the duplicate on every benchmark run.
+2. `candidate_models`: explicit `"deepseek-coder-v2:latest"` entry — unconditionally pulls
+   `:latest` as a testable model.
+
+**Fix applied to `inventory/group_vars/all.yml`:**
+- `baseline_models`: changed `"deepseek-coder-v2"` → `"deepseek-coder-v2:16b"` (explicit tag)
+- `candidate_models`: removed the `deepseek-coder-v2:latest` entry entirely
+
+**Also required on ai_server:** `ollama rm deepseek-coder-v2:latest`
+
+**Finding 2 — All scores and tuning variables remain stable (no action).** Every delta vs
+Run 5 is within noise (≤ ±0.010 composite, quality scores identical). The rubric is
+confirmed deterministic across 6 runs.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal |
+| `benchmark_large_timeout` | 480s | Adequate |
+| `benchmark_toks_norm_ceiling` | 40 | Correct |
+| `benchmark_coding_threshold` | 0.10 | Correct |
+
+**Finding 3 — qwen2.5-coder:14B not yet in slot6 (consequence of Finding 1).** With
+deepseek:latest occupying slot4, the coding rank yields:
+  #1 deepseek:16b (0.737) → slot3, #2 deepseek:latest (0.735) → slot4,
+  #3 qwen:7b (0.666) → slot6, #4 qwen:14B (0.573) → excluded.
+After deepseek:latest is permanently removed, Run 7 expected layout:
+  slot3=deepseek:16b, slot4=qwen:7b, slot6=qwen:14B.
+
+**Finding 4 — gemma3:12b TTFT=5540ms (informational, no action).** Persistently above
+5000ms floor → latency_score=0 every run. Hardware-limited, not a tuning issue.
+Correctly holds slot5_general_rotate on general_quality=0.966.
+
+**Finding 5 — codellama:34b correctly excluded again (informational, no action).**
+composite=0.432, general_quality=0.586 — below both thresholds. Pipeline working as designed.
+
+## Next Action
+
+1. Remove duplicate from ai_server: `ollama rm deepseek-coder-v2:latest`
+2. Run 7 (clean benchmark):
+
+```bash
+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
+
+Expected Run 7: slot4=`qwen2.5-coder:7b`, slot6=`qwen2.5-coder:14B`,
+`deepseek-coder-v2:latest` absent from `all_metrics`.
+
+---
+
+# Addendum — Run 7 Review (target Node 0 layout achieved, session closed)
+
+## Run History (all seven runs)
+
+| Run | Timestamp | Condition | Models | Result |
+|-----|-----------|-----------|--------|--------|
+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
+| 7 | 20260310T170013 | group_vars fix applied, deepseek:latest absent | 7 | quality=0.83–0.97, tok/sec=3.2–23.5 |
+
+## Full Run 7 Results
+
+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
+| deepseek-coder-v2:16b | 23.5 | 0.833 | 0.885 | 1568.5 | 0.723 | 0.746 | coding |
+| qwen2.5-coder:7b | 12.5 | 0.850 | 0.910 | 1431.0 | 0.655 | 0.682 | coding |
+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2229.7 | 0.570 | 0.607 | coding |
+| codellama:34b | 3.2 | 0.833 | 0.586 | 4235.4 | 0.437 | 0.326 | coding |
+| llama3.2:3b | 23.0 | 0.890 | 0.954 | 754.8 | 0.786 | 0.814 | general |
+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2202.0 | 0.599 | 0.623 | general |
+| gemma3:12b-it-q4_K_M | 6.1 | 0.873 | 0.966 | 5941.9 | 0.439 | 0.481 | general |
+
+`deepseek-coder-v2:latest` **absent** from `all_metrics` — group_vars fix verified working.
+
+## Run 6 → Run 7 Comparison (all common models)
+
+| Model | R6 tok/sec | R7 tok/sec | R6 coding_comp | R7 coding_comp | Delta |
+|-------|-----------|-----------|----------------|----------------|-------|
+| deepseek-coder-v2:16b | 24.2 | 23.5 | 0.737 | 0.723 | −0.014 (noise) |
+| qwen2.5-coder:7b | 12.6 | 12.5 | 0.666 | 0.655 | −0.011 (noise) |
+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.570 | −0.003 (noise) |
+| llama3.2:3b | 24.2 | 23.0 | 0.803 | 0.786 | −0.017 (noise) |
+| llama3.1:8b | 11.8 | 11.8 | 0.600 | 0.599 | −0.001 (noise) |
+| gemma3:12b-it-q4_K_M | 6.2 | 6.1 | 0.440 | 0.439 | −0.001 (noise) |
+| codellama:34b | 3.2 | 3.2 | 0.432 | 0.437 | +0.005 (noise) |
+
+Quality scores are **identical** across all common models. All composites within run-to-run
+noise (≤ ±0.017). Rubric confirmed deterministic across 7 runs.
+
+## Run 7 Slot Assignments (final, confirmed clean)
+
+| Slot | Socket | Role | Model | Composite |
+|------|--------|------|-------|-----------|
+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 ✅ |
+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.570 ✅ |
+
+## Findings
+
+**Finding 1 — Target Node 0 diversity layout achieved (RESOLVED).** Run 7 confirms the
+intended three-tier Node 0 layout:
+- slot3: deepseek-coder-v2:16b — deep specialist (23.5 tok/sec, 16B params)
+- slot4: qwen2.5-coder:7b — fast+light (12.5 tok/sec, 7B params)
+- slot6: qwen2.5-coder:14B — slower+richer (6.6 tok/sec, 14B params)
+
+All three are genuinely distinct models with different speed/quality tradeoffs.
+
+**Finding 2 — group_vars fix verified working (RESOLVED).** `deepseek-coder-v2:latest` is
+absent from `all_metrics`. Explicit `:16b` tag in `baseline_models` prevents Ollama from
+resolving to `:latest` on subsequent runs. The fix is durable — re-running `03_benchmark.yml`
+will not re-introduce the duplicate.
+
+**Finding 3 — All scores and tuning variables stable (no action).** Every delta vs Run 6 is
+within noise (≤ ±0.017 composite, quality scores identical). The pipeline is confirmed
+deterministic and stable.
+
+| Variable | Value | Status |
+|----------|-------|--------|
+| `benchmark_num_predict` | 500 | Optimal |
+| `benchmark_large_timeout` | 480s | Adequate |
+| `benchmark_toks_norm_ceiling` | 40 | Correct |
+| `benchmark_coding_threshold` | 0.10 | Correct |
+
+**Finding 4 — Benchmark pipeline declared stable. Session closed.** Seven runs over two
+days confirmed: NUMA fix correct, scoring rubric deterministic, duplicate-model detection
+pattern documented, group_vars idempotent. No further benchmark runs or tuning changes are
+needed unless new models are added to `candidate_models`.

+ 151 - 71
benchmarks/results/model_selection.json

@@ -1,116 +1,196 @@
 {
     "all_metrics": {
-        "deepseek-coder-v2:latest": {
-            "avg_tok_per_sec": 21.6,
+        "codellama:34b": {
+            "avg_tok_per_sec": 3.2,
             "category": "coding",
-            "coding_composite": 0.764,
-            "coding_quality": 0.657,
-            "general_composite": 0.867,
-            "general_quality": 0.886,
-            "latency_ms": 1510.5,
-            "latency_score": 0.698,
-            "toks_norm": 0.982
+            "coding_composite": 0.437,
+            "coding_quality": 0.833,
+            "general_composite": 0.326,
+            "general_quality": 0.586,
+            "latency_ms": 4235.4,
+            "latency_score": 0.153,
+            "toks_norm": 0.08
+        },
+        "deepseek-coder-v2:16b": {
+            "avg_tok_per_sec": 23.5,
+            "category": "coding",
+            "coding_composite": 0.723,
+            "coding_quality": 0.833,
+            "general_composite": 0.746,
+            "general_quality": 0.885,
+            "latency_ms": 1568.5,
+            "latency_score": 0.686,
+            "toks_norm": 0.586
         },
         "gemma3:12b-it-q4_K_M": {
-            "avg_tok_per_sec": 5.6,
+            "avg_tok_per_sec": 6.1,
             "category": "general",
-            "coding_composite": 0.416,
-            "coding_quality": 0.757,
-            "general_composite": 0.495,
-            "general_quality": 0.931,
-            "latency_ms": 5975.8,
+            "coding_composite": 0.439,
+            "coding_quality": 0.873,
+            "general_composite": 0.481,
+            "general_quality": 0.966,
+            "latency_ms": 5941.9,
             "latency_score": 0,
-            "toks_norm": 0.253
+            "toks_norm": 0.153
+        },
+        "llama3.1:8b": {
+            "avg_tok_per_sec": 11.8,
+            "category": "general",
+            "coding_composite": 0.599,
+            "coding_quality": 0.823,
+            "general_composite": 0.623,
+            "general_quality": 0.877,
+            "latency_ms": 2202.0,
+            "latency_score": 0.56,
+            "toks_norm": 0.294
         },
         "llama3.2:3b": {
-            "avg_tok_per_sec": 22.5,
+            "avg_tok_per_sec": 23.0,
             "category": "general",
-            "coding_composite": 0.846,
-            "coding_quality": 0.723,
-            "general_composite": 0.961,
-            "general_quality": 0.979,
-            "latency_ms": 580.7,
-            "latency_score": 0.884,
-            "toks_norm": 1.0
+            "coding_composite": 0.786,
+            "coding_quality": 0.89,
+            "general_composite": 0.814,
+            "general_quality": 0.954,
+            "latency_ms": 754.8,
+            "latency_score": 0.849,
+            "toks_norm": 0.576
+        },
+        "qwen2.5-coder:14B": {
+            "avg_tok_per_sec": 6.6,
+            "category": "coding",
+            "coding_composite": 0.57,
+            "coding_quality": 0.85,
+            "general_composite": 0.607,
+            "general_quality": 0.931,
+            "latency_ms": 2229.7,
+            "latency_score": 0.554,
+            "toks_norm": 0.164
         },
         "qwen2.5-coder:7b": {
-            "avg_tok_per_sec": 12.3,
+            "avg_tok_per_sec": 12.5,
             "category": "coding",
-            "coding_composite": 0.664,
-            "coding_quality": 0.683,
-            "general_composite": 0.756,
-            "general_quality": 0.888,
-            "latency_ms": 1222.4,
-            "latency_score": 0.756,
-            "toks_norm": 0.56
+            "coding_composite": 0.655,
+            "coding_quality": 0.85,
+            "general_composite": 0.682,
+            "general_quality": 0.91,
+            "latency_ms": 1431.0,
+            "latency_score": 0.714,
+            "toks_norm": 0.312
         }
     },
     "coding_ranking": [
         {
-            "composite": 0.764,
+            "composite": 0.723,
             "metrics": {
-                "avg_tok_per_sec": 21.6,
+                "avg_tok_per_sec": 23.5,
                 "category": "coding",
-                "coding_composite": 0.764,
-                "coding_quality": 0.657,
-                "general_composite": 0.867,
-                "general_quality": 0.886,
-                "latency_ms": 1510.5,
-                "latency_score": 0.698,
-                "toks_norm": 0.982
+                "coding_composite": 0.723,
+                "coding_quality": 0.833,
+                "general_composite": 0.746,
+                "general_quality": 0.885,
+                "latency_ms": 1568.5,
+                "latency_score": 0.686,
+                "toks_norm": 0.586
             },
-            "name": "deepseek-coder-v2:latest"
+            "name": "deepseek-coder-v2:16b"
         },
         {
-            "composite": 0.664,
+            "composite": 0.655,
             "metrics": {
-                "avg_tok_per_sec": 12.3,
+                "avg_tok_per_sec": 12.5,
                 "category": "coding",
-                "coding_composite": 0.664,
-                "coding_quality": 0.683,
-                "general_composite": 0.756,
-                "general_quality": 0.888,
-                "latency_ms": 1222.4,
-                "latency_score": 0.756,
-                "toks_norm": 0.56
+                "coding_composite": 0.655,
+                "coding_quality": 0.85,
+                "general_composite": 0.682,
+                "general_quality": 0.91,
+                "latency_ms": 1431.0,
+                "latency_score": 0.714,
+                "toks_norm": 0.312
             },
             "name": "qwen2.5-coder:7b"
+        },
+        {
+            "composite": 0.57,
+            "metrics": {
+                "avg_tok_per_sec": 6.6,
+                "category": "coding",
+                "coding_composite": 0.57,
+                "coding_quality": 0.85,
+                "general_composite": 0.607,
+                "general_quality": 0.931,
+                "latency_ms": 2229.7,
+                "latency_score": 0.554,
+                "toks_norm": 0.164
+            },
+            "name": "qwen2.5-coder:14B"
+        },
+        {
+            "composite": 0.437,
+            "metrics": {
+                "avg_tok_per_sec": 3.2,
+                "category": "coding",
+                "coding_composite": 0.437,
+                "coding_quality": 0.833,
+                "general_composite": 0.326,
+                "general_quality": 0.586,
+                "latency_ms": 4235.4,
+                "latency_score": 0.153,
+                "toks_norm": 0.08
+            },
+            "name": "codellama:34b"
         }
     ],
     "general_ranking": [
         {
-            "composite": 0.961,
+            "composite": 0.814,
             "metrics": {
-                "avg_tok_per_sec": 22.5,
+                "avg_tok_per_sec": 23.0,
                 "category": "general",
-                "coding_composite": 0.846,
-                "coding_quality": 0.723,
-                "general_composite": 0.961,
-                "general_quality": 0.979,
-                "latency_ms": 580.7,
-                "latency_score": 0.884,
-                "toks_norm": 1.0
+                "coding_composite": 0.786,
+                "coding_quality": 0.89,
+                "general_composite": 0.814,
+                "general_quality": 0.954,
+                "latency_ms": 754.8,
+                "latency_score": 0.849,
+                "toks_norm": 0.576
             },
             "name": "llama3.2:3b"
         },
         {
-            "composite": 0.495,
+            "composite": 0.623,
             "metrics": {
-                "avg_tok_per_sec": 5.6,
+                "avg_tok_per_sec": 11.8,
                 "category": "general",
-                "coding_composite": 0.416,
-                "coding_quality": 0.757,
-                "general_composite": 0.495,
-                "general_quality": 0.931,
-                "latency_ms": 5975.8,
+                "coding_composite": 0.599,
+                "coding_quality": 0.823,
+                "general_composite": 0.623,
+                "general_quality": 0.877,
+                "latency_ms": 2202.0,
+                "latency_score": 0.56,
+                "toks_norm": 0.294
+            },
+            "name": "llama3.1:8b"
+        },
+        {
+            "composite": 0.481,
+            "metrics": {
+                "avg_tok_per_sec": 6.1,
+                "category": "general",
+                "coding_composite": 0.439,
+                "coding_quality": 0.873,
+                "general_composite": 0.481,
+                "general_quality": 0.966,
+                "latency_ms": 5941.9,
                 "latency_score": 0,
-                "toks_norm": 0.253
+                "toks_norm": 0.153
             },
             "name": "gemma3:12b-it-q4_K_M"
         }
     ],
     "slot1_general": "llama3.2:3b",
-    "slot2_general": "gemma3:12b-it-q4_K_M",
-    "slot3_coding": "deepseek-coder-v2:latest",
-    "slot4_coding": "qwen2.5-coder:7b"
+    "slot2_general": "llama3.1:8b",
+    "slot3_coding": "deepseek-coder-v2:16b",
+    "slot4_coding": "qwen2.5-coder:7b",
+    "slot5_general_rotate": "gemma3:12b-it-q4_K_M",
+    "slot6_coding_rotate": "qwen2.5-coder:14B"
 }

+ 26 - 10
inventory/group_vars/all.yml

@@ -51,6 +51,7 @@ vault_approle_name: "ai-services"
 # Service ports
 keycloak_port: 8180
 ollama_port: 11434
+ollama_node0_port: 11435
 qdrant_http_port: 6333
 qdrant_grpc_port: 6334
 
@@ -58,17 +59,19 @@ qdrant_grpc_port: 6334
 ollama_host: "0.0.0.0:11434"
 ollama_num_threads: 14
 ollama_num_parallel: 2
-ollama_max_loaded_models: 4
+ollama_max_loaded_models: 3   # 3 per socket (6 total across both NUMA instances)
 ollama_keep_alive: "-1"
 ollama_flash_attention: "1"
 
 # NUMA/CPU affinity - Dell M630, 2x E5-2690v4
 # CPUs are interleaved: odd = socket 1 (NUMA node 1), even = socket 0.
 # Physical cores on node 1: 1,3,...,27 (14 cores). HT siblings: 29,31,...,55.
+# Physical cores on node 0: 0,2,...,26 (14 cores). HT siblings: 28,30,...,54.
 # Pinning to physical cores only eliminates HT contention on the memory bus.
 # NUMA node 1 has ~120 GB free RAM vs node 0's ~75 GB.
 ollama_numa_node: "1"
 ollama_cpu_affinity: "1 3 5 7 9 11 13 15 17 19 21 23 25 27"
+ollama_node0_cpu_affinity: "0 2 4 6 8 10 12 14 16 18 20 22 24 26"
 ollama_binary_path: /usr/bin/ollama
 
 # Keycloak configuration
@@ -85,9 +88,27 @@ benchmark_thresholds:
   min_quality_score: 0.6
   min_composite_score: 0.55
 
-benchmark_toks_norm_ceiling: 22     # Observed hardware max on Dell M630 (22.5 tok/sec measured)
+benchmark_toks_norm_ceiling: 40     # Conservative dual-socket estimate (was 22 single-socket)
 benchmark_coding_threshold: 0.10    # Delta to classify a model as coding-specialized
 
+# Modelfile aliases created by 04_models.yml — excluded from benchmark to prevent
+# 32k-token KV cache allocations stalling the run with 285-second response times.
+benchmark_skip_aliases:
+  - "coder-128k"
+  - "coder-32k"
+  - "coder-rotate"
+  - "llama-family"
+  - "gemma-family"
+
+benchmark_small_max_gb: 10    # upper size boundary for small pass (< 10 GB), based on runtime RAM
+benchmark_medium_max_gb: 15   # upper size boundary for medium pass (10–15 GB), based on runtime RAM
+benchmark_size_overhead_factor: 1.2  # ollama list shows disk size; multiply by this to estimate runtime RAM
+benchmark_load_timeout: 180      # seconds — warm-up "Hi" prompt per model before benchmarking
+benchmark_small_timeout: 90      # seconds per request, small models (<10 GB)
+benchmark_medium_timeout: 240    # seconds per request, medium models (10–15 GB)
+benchmark_large_timeout: 480     # seconds per request, large models (>15 GB)
+benchmark_num_predict: 500       # cap output tokens; allows full coding responses (def+return+docstring+assert); worst-case: 6.5 tok/s→77s, 22 tok/s→23s
+
 # Explicit category overrides applied before heuristics. Keys are model names as
 # returned by `ollama list`. Valid values: 'coding' or 'general'.
 # Example: { "deepseek-coder-v2": "coding", "qwen2.5-coder:7b": "coding" }
@@ -97,7 +118,7 @@ model_category_overrides: {}
 # These are the minimum set needed to populate all 4 slots with meaningful candidates.
 baseline_models:
   - "llama3.2:3b"
-  - "deepseek-coder-v2"
+  - "deepseek-coder-v2:16b"
   - "qwen2.5-coder:7b"
   - "llama3.1:8b"
 
@@ -108,11 +129,6 @@ candidate_models:
     expected_tokens_sec: 4.5
     reason: "Larger qwen2.5-coder for higher quality"
     category: coding
-  - name: "deepseek-coder-v2:latest"
-    size_gb: 9
-    expected_tokens_sec: 8.0
-    reason: "DeepSeek Coder V2 full model"
-    category: coding
   - name: "codegemma:7b-instruct-q5_K_M"
     size_gb: 5.5
     expected_tokens_sec: 12.0
@@ -124,8 +140,8 @@ candidate_models:
     reason: "StarCoder2 coding specialist"
     category: coding
 
-# OpenClaw default model
-openclaw_model: "llama3.2:3b"
+# OpenClaw default model — overridden dynamically by 08_openclaw.yml from slot1_general
+openclaw_model: "deepseek-coder-v2:16b-lite-instruct-q4_K_M"
 
 # AWS Bedrock (OpenAI-compatible API via Open WebUI)
 # Pass bearer_token on first run: -e "bedrock_bearer_token=<value>"

+ 49 - 0
playbooks/01_vault.yml

@@ -132,12 +132,14 @@
       register: vault_init_check
       tags:
         - vault-init
+        - vault-unseal
 
     - name: "Vault | Set initialization status fact"
       ansible.builtin.set_fact:
         vault_is_initialized: "{{ vault_init_check.status != 501 }}"
       tags:
         - vault-init
+        - vault-unseal
 
     - name: "Vault | Initialize Vault"
       ansible.builtin.command:
@@ -235,6 +237,47 @@
       tags:
         - vault-unseal
 
+    # ── Auto-unseal on reboot ─────────────────────────────────────────
+    - name: "Vault | Deploy unseal key to server"
+      ansible.builtin.copy:
+        content: "{{ vault_init_data.unseal_keys_b64[0] }}"
+        dest: /etc/vault.d/unseal.key
+        owner: root
+        group: root
+        mode: "0400"
+      tags:
+        - vault-unseal
+        - vault-autounseal
+
+    - name: "Vault | Deploy vault-unseal.sh"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.sh.j2"
+        dest: /usr/local/bin/vault-unseal.sh
+        owner: root
+        group: root
+        mode: "0750"
+      tags:
+        - vault-autounseal
+
+    - name: "Vault | Deploy vault-unseal.service"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.service.j2"
+        dest: /etc/systemd/system/vault-unseal.service
+        owner: root
+        group: root
+        mode: "0644"
+      notify: Reload systemd and restart vault-unseal
+      tags:
+        - vault-autounseal
+
+    - name: "Vault | Enable vault-unseal.service"
+      ansible.builtin.systemd:
+        name: vault-unseal.service
+        enabled: true
+        daemon_reload: true
+      tags:
+        - vault-autounseal
+
     - name: "Vault | Set root token fact"
       ansible.builtin.set_fact:
         vault_root_token: "{{ vault_init_data.root_token }}"
@@ -516,3 +559,9 @@
         name: vault
         state: restarted
         daemon_reload: true
+
+    - name: Reload systemd and restart vault-unseal
+      ansible.builtin.systemd:
+        name: vault-unseal.service
+        state: restarted
+        daemon_reload: true

+ 44 - 2
playbooks/02_infrastructure.yml

@@ -155,6 +155,42 @@
       tags:
         - ollama
 
+    - name: "Ollama | Deploy ollama-node0 systemd unit"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/ollama/ollama-node0.service.j2"
+        dest: /etc/systemd/system/ollama-node0.service
+        mode: "0644"
+        owner: root
+        group: root
+      notify:
+        - Reload systemd and start ollama-node0
+      tags:
+        - ollama
+
+    - name: "Ollama | Enable and start ollama-node0"
+      ansible.builtin.systemd:
+        name: ollama-node0
+        enabled: true
+        state: started
+        daemon_reload: true
+      tags:
+        - ollama
+
+    - name: "Ollama | Wait for ollama-node0 API to be ready"
+      ansible.builtin.uri:
+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
+        method: GET
+        headers:
+          Authorization: "Bearer {{ ollama_api_key }}"
+        status_code: 200
+        timeout: 10
+      register: ollama_node0_ready
+      retries: 24
+      delay: 5
+      until: ollama_node0_ready.status == 200
+      tags:
+        - ollama
+
     # ── OS-level kernel tuning for dedicated inference server ────────────────
     - name: "OS Tune | Apply sysctl settings for inference workload"
       ansible.posix.sysctl:
@@ -164,8 +200,8 @@
         reload: true
         state: present
       loop:
-        # Disable auto-NUMA migration — fights explicit numactl --membind=1 by
-        # moving KV-cache pages mid-inference to a different NUMA node.
+        # Disable auto-NUMA migration — CPUAffinity pins Ollama to node 1/0
+        # physical cores; NUMA balancing could migrate pages mid-inference.
         - { name: kernel.numa_balancing, value: "0" }
         # Near-zero swappiness: prevents model weights being paged out under
         # memory pressure (complements LimitMEMLOCK=infinity in the unit file).
@@ -261,6 +297,12 @@
         state: restarted
         daemon_reload: true
 
+    - name: Reload systemd and start ollama-node0
+      ansible.builtin.systemd:
+        name: ollama-node0
+        state: started
+        daemon_reload: true
+
     - name: Reload systemd daemon
       ansible.builtin.systemd:
         daemon_reload: true

+ 165 - 37
playbooks/03_benchmark.yml

@@ -84,6 +84,31 @@
       tags:
         - benchmark-discover
 
+    - name: "Benchmark | Stop warmup services for clean benchmark run"
+      ansible.builtin.systemd:
+        name: "{{ item }}"
+        state: stopped
+      loop:
+        - ollama-warmup.service
+        - ollama-warmup-node0.service
+      failed_when: false
+      become: true
+      tags:
+        - benchmark-setup
+
+    - name: "Benchmark | Wait for node0 Ollama API to be ready"
+      ansible.builtin.uri:
+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
+        method: GET
+        status_code: 200
+        timeout: 10
+      register: ollama_node0_ready
+      retries: 24
+      delay: 5
+      until: ollama_node0_ready.status == 200
+      tags:
+        - benchmark-setup
+
     - name: "Benchmark | Discover installed models"
       ansible.builtin.command: ollama list
       changed_when: false
@@ -100,44 +125,124 @@
       tags:
         - benchmark-discover
 
-    - name: "Benchmark | Set models_to_benchmark to all installed models"
+    - name: "Benchmark | Parse model sizes from ollama list"
       ansible.builtin.set_fact:
-        models_to_benchmark: "{{ installed_models }}"
+        _benchmark_sizes_json: |
+          {% set ns = namespace(d={}) %}
+          {% for line in ollama_list_output.stdout_lines[1:] %}
+          {%   set p = line.split() %}
+          {%   if p | length >= 4 %}
+          {%     set gb = (p[2] | float) if (p[3] | upper == 'GB') else ((p[2] | float) / 1024) %}
+          {%     set _ = ns.d.update({p[0]: gb}) %}
+          {%   endif %}
+          {% endfor %}
+          {{ ns.d | to_json }}
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Partition models into small, medium, and large passes"
+      ansible.builtin.set_fact:
+        _small_models:  "{{ _alias_filtered | select('in', _small_ok)  | list }}"
+        _medium_models: "{{ _alias_filtered | select('in', _medium_ok) | list }}"
+        _large_models:  "{{ _alias_filtered | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
+        models_to_benchmark: "{{ _alias_filtered | list }}"
+      vars:
+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
+                                            | selectattr('value', 'le', _medium_cut | float)
+                                            | map(attribute='key') | list }}"
+        _alias_filtered: "{{ installed_models | reject('match', '^(' ~ benchmark_skip_aliases | join('|') ~ ')(:|$)') | list }}"
       when: benchmark_models | default('') | length == 0
       tags:
         - benchmark-discover
 
     - name: "Benchmark | Set models_to_benchmark to specified subset"
       ansible.builtin.set_fact:
-        models_to_benchmark: "{{ benchmark_models.split(',') | map('trim') | list }}"
+        models_to_benchmark: "{{ _specified }}"
+        _small_models:  "{{ _specified | select('in', _small_ok)  | list }}"
+        _medium_models: "{{ _specified | select('in', _medium_ok) | list }}"
+        _large_models:  "{{ _specified | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
+      vars:
+        _specified: "{{ benchmark_models.split(',') | map('trim') | list }}"
+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
+                                            | selectattr('value', 'le', _medium_cut | float)
+                                            | map(attribute='key') | list }}"
       when: benchmark_models | default('') | length > 0
       tags:
         - benchmark-discover
 
+    - name: "Benchmark | Initialize batch accumulator facts"
+      ansible.builtin.set_fact:
+        bench_all_results: []
+        all_eligible_models: []
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Build per-model benchmark timeout map"
+      ansible.builtin.set_fact:
+        _benchmark_timeout_map_json: |
+          {% set ns = namespace(d={}) %}
+          {% for m in models_to_benchmark %}
+          {%   if m in _small_models %}
+          {%     set _ = ns.d.update({m: benchmark_small_timeout | int}) %}
+          {%   elif m in _medium_models %}
+          {%     set _ = ns.d.update({m: benchmark_medium_timeout | int}) %}
+          {%   else %}
+          {%     set _ = ns.d.update({m: benchmark_large_timeout | int}) %}
+          {%   endif %}
+          {% endfor %}
+          {{ ns.d | to_json }}
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Parse benchmark timeout map"
+      ansible.builtin.set_fact:
+        _benchmark_timeout_map: "{{ _benchmark_timeout_map_json | from_json }}"
+      tags:
+        - benchmark-discover
+
+    - name: "Benchmark | Sort models largest-first so heaviest models land on node1 (120 GB)"
+      ansible.builtin.set_fact:
+        models_to_benchmark: >-
+          {{ (_large_models + _medium_models + _small_models)
+             | select('in', models_to_benchmark) | list }}
+      tags:
+        - benchmark-discover
+
     - name: "Benchmark | Display models to benchmark"
       ansible.builtin.debug:
-        msg: "Will benchmark the following models: {{ models_to_benchmark }}"
+        msg:
+          - "Small  pass (timeout {{ benchmark_small_timeout }}s,  ≤{{ benchmark_small_max_gb }}GB):  {{ _small_models }}"
+          - "Medium pass (timeout {{ benchmark_medium_timeout }}s, {{ benchmark_small_max_gb }}–{{ benchmark_medium_max_gb }}GB): {{ _medium_models }}"
+          - "Large  pass (timeout {{ benchmark_large_timeout }}s, >{{ benchmark_medium_max_gb }}GB): {{ _large_models }}"
+          - "Load timeout (warm-up 'Hi' prompt): {{ benchmark_load_timeout }}s"
+          - "Total: {{ models_to_benchmark | length }} models, {{ (models_to_benchmark | batch(6) | list) | length }} batch(es) of ≤6"
       tags:
         - benchmark-discover
 
-    - name: "Benchmark | Run test prompts against each model"
-      ansible.builtin.uri:
-        url: "{{ ollama_api_url }}/api/generate"
-        method: POST
-        body_format: json
-        body:
-          model: "{{ item.0 }}"
-          prompt: "{{ test_prompts[item.1].prompt }}"
-          stream: false
-        headers:
-          Authorization: "Bearer {{ ollama_api_key }}"
-        timeout: 300
-        status_code: 200
-      loop: "{{ models_to_benchmark | product(test_prompts.keys() | list) | list }}"
+    - name: "Benchmark | Process batch {{ _loop_idx + 1 }} of {{ models_to_benchmark | batch(6) | list | length }}"
+      ansible.builtin.include_tasks: _bench_tier_batch.yml
+      vars:
+        _batch_node1: "{{ _batch[:3] }}"
+        _batch_node0: "{{ _batch[3:] }}"
+      loop: "{{ models_to_benchmark | batch(6) | list }}"
       loop_control:
-        label: "{{ item.0 }} / {{ item.1 }}"
-      register: benchmark_raw_results
-      failed_when: false
+        loop_var: _batch
+        label: "batch {{ _loop_idx + 1 }}: node1={{ _batch[:3] }} node0={{ _batch[3:] }}"
+        index_var: _loop_idx
+      tags:
+        - benchmark-run
+
+    - name: "Benchmark | Display models that failed to load"
+      ansible.builtin.debug:
+        msg: "Load failures (excluded from scoring): {{ models_to_benchmark | reject('in', all_eligible_models) | list }}"
       tags:
         - benchmark-run
 
@@ -145,9 +250,9 @@
       ansible.builtin.set_fact:
         model_metrics: |
           {% set ns = namespace(results={}) %}
-          {% for model in models_to_benchmark %}
+          {% for model in all_eligible_models %}
           {%   set ns2 = namespace(coding_quality=0, coding_count=0, general_quality=0, general_count=0, total_toks=0, total_eval_time=0, ttft_sum=0, ttft_count=0, latency_ns=0) %}
-          {%   for result in benchmark_raw_results.results %}
+          {%   for result in bench_all_results %}
           {%     if result.item[0] == model and result.status == 200 %}
           {%       set test_name = result.item[1] %}
           {%       set resp = result.json | default({}) %}
@@ -160,7 +265,7 @@
           {%       set ns2.ttft_sum = ns2.ttft_sum + prompt_eval_duration %}
           {%       set ns2.ttft_count = ns2.ttft_count + 1 %}
           {%       if test_name == 'latency' %}
-          {%         set ns2.latency_ns = eval_duration + prompt_eval_duration %}
+          {%         set ns2.latency_ns = ((resp.total_duration | default(0) | int) - (resp.load_duration | default(0) | int)) | abs %}
           {%       endif %}
           {%       set resp_len = response_text | length %}
           {%       if test_name in ['code_gen', 'debug', 'refactor'] %}
@@ -239,9 +344,14 @@
           {% set coding_sorted = coding_models | sort(attribute='composite', reverse=true) %}
           {% set slot1 = general_sorted[0].name if general_sorted | length > 0 else 'none' %}
           {% set slot2 = general_sorted[1].name if general_sorted | length > 1 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
+          {% set slot5 = general_sorted[2].name if general_sorted | length > 2 else 'none' %}
           {% set slot3 = coding_sorted[0].name if coding_sorted | length > 0 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
           {% set slot4 = coding_sorted[1].name if coding_sorted | length > 1 else (coding_sorted[0].name if coding_sorted | length > 0 else 'none') %}
-          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot3_coding': slot3, 'slot4_coding': slot4, 'all_metrics': parsed_metrics, 'general_ranking': general_sorted, 'coding_ranking': coding_sorted} | to_json }}
+          {% set slot6 = coding_sorted[2].name if coding_sorted | length > 2 else 'none' %}
+          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot5_general_rotate': slot5,
+              'slot3_coding': slot3, 'slot4_coding': slot4, 'slot6_coding_rotate': slot6,
+              'all_metrics': parsed_metrics, 'general_ranking': general_sorted,
+              'coding_ranking': coding_sorted} | to_json }}
       tags:
         - benchmark-select
 
@@ -255,12 +365,16 @@
       ansible.builtin.debug:
         msg:
           - "============================================="
-          - "  MODEL SELECTION RESULTS"
+          - "  MODEL SELECTION RESULTS  (6-slot / 2-socket)"
           - "============================================="
-          - "  Slot 1 (General Primary):  {{ selection.slot1_general }}"
-          - "  Slot 2 (General Secondary): {{ selection.slot2_general }}"
-          - "  Slot 3 (Coding Primary):   {{ selection.slot3_coding }}"
-          - "  Slot 4 (Coding Secondary): {{ selection.slot4_coding }}"
+          - "  Node 1 — General (port 11434)"
+          - "  Slot 1 (locked):   {{ selection.slot1_general }}"
+          - "  Slot 2 (locked):   {{ selection.slot2_general }}"
+          - "  Slot 5 (rotate):   {{ selection.slot5_general_rotate }}"
+          - "  Node 0 — Coding (port 11435)"
+          - "  Slot 3 (locked):   {{ selection.slot3_coding }}"
+          - "  Slot 4 (locked):   {{ selection.slot4_coding }}"
+          - "  Slot 6 (rotate):   {{ selection.slot6_coding_rotate }}"
           - "============================================="
       tags:
         - benchmark-select
@@ -276,13 +390,15 @@
         content: |
           # Benchmark Results - {{ benchmark_timestamp }}
 
-          ## Model Selection
-          | Slot | Role | Model | Composite Score |
-          |------|------|-------|----------------|
-          | 1 | General (Primary) | {{ selection.slot1_general }} | {{ parsed_metrics[selection.slot1_general].general_composite | default('N/A') }} |
-          | 2 | General (Secondary) | {{ selection.slot2_general }} | {{ parsed_metrics[selection.slot2_general].general_composite | default('N/A') }} |
-          | 3 | Coding (Primary) | {{ selection.slot3_coding }} | {{ parsed_metrics[selection.slot3_coding].coding_composite | default('N/A') }} |
-          | 4 | Coding (Secondary) | {{ selection.slot4_coding }} | {{ parsed_metrics[selection.slot4_coding].coding_composite | default('N/A') }} |
+          ## Model Selection (6-slot / 2-socket)
+          | Slot | Socket | Role | Model | Composite Score |
+          |------|--------|------|-------|----------------|
+          | 1 | Node 1 (port 11434) | General (locked) | {{ selection.slot1_general }} | {{ (parsed_metrics[selection.slot1_general].general_composite if selection.slot1_general in parsed_metrics else 'N/A') }} |
+          | 2 | Node 1 (port 11434) | General (locked) | {{ selection.slot2_general }} | {{ (parsed_metrics[selection.slot2_general].general_composite if selection.slot2_general in parsed_metrics else 'N/A') }} |
+          | 5 | Node 1 (port 11434) | General (rotate) | {{ selection.slot5_general_rotate }} | {{ (parsed_metrics[selection.slot5_general_rotate].general_composite if selection.slot5_general_rotate in parsed_metrics else 'N/A') }} |
+          | 3 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot3_coding }} | {{ (parsed_metrics[selection.slot3_coding].coding_composite if selection.slot3_coding in parsed_metrics else 'N/A') }} |
+          | 4 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot4_coding }} | {{ (parsed_metrics[selection.slot4_coding].coding_composite if selection.slot4_coding in parsed_metrics else 'N/A') }} |
+          | 6 | Node 0 (port 11435) | Coding (rotate) | {{ selection.slot6_coding_rotate }} | {{ (parsed_metrics[selection.slot6_coding_rotate].coding_composite if selection.slot6_coding_rotate in parsed_metrics else 'N/A') }} |
 
           ## Detailed Metrics
           {% for model, metrics in parsed_metrics.items() %}
@@ -342,3 +458,15 @@
       changed_when: true
       tags:
         - benchmark-pull
+
+    - name: "Benchmark | Restart warmup services after benchmark"
+      ansible.builtin.systemd:
+        name: "{{ item }}"
+        state: restarted
+      loop:
+        - ollama-warmup.service
+        - ollama-warmup-node0.service
+      failed_when: false
+      become: true
+      tags:
+        - benchmark-cleanup

+ 74 - 7
playbooks/04_models.yml

@@ -11,7 +11,9 @@
   vars:
     model_selection_file: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
     modelfiles_dir: /mnt/ai_data/ollama_models/modelfiles
-    slot4_model: ""
+    slot4_model: ""   # legacy override kept for backwards compatibility
+    slot5_model: ""   # overrides slot5_general_rotate
+    slot6_model: ""   # overrides slot6_coding_rotate
     ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
 
   tasks:
@@ -38,13 +40,31 @@
       tags:
         - models-load
 
+    - name: "Models | Apply slot5 override if provided"
+      ansible.builtin.set_fact:
+        model_selection: "{{ model_selection | combine({'slot5_general_rotate': slot5_model}) }}"
+      when: slot5_model | length > 0
+      tags:
+        - models-load
+
+    - name: "Models | Apply slot6 override if provided"
+      ansible.builtin.set_fact:
+        model_selection: "{{ model_selection | combine({'slot6_coding_rotate': slot6_model}) }}"
+      when: slot6_model | length > 0
+      tags:
+        - models-load
+
     - name: "Models | Display selected models"
       ansible.builtin.debug:
         msg:
-          - "Slot 1 (General Primary):   {{ model_selection.slot1_general }}"
-          - "Slot 2 (General Secondary):  {{ model_selection.slot2_general }}"
-          - "Slot 3 (Coding Primary):    {{ model_selection.slot3_coding }}"
-          - "Slot 4 (Coding Secondary):  {{ model_selection.slot4_coding }}"
+          - "=== Node 1 — General (port 11434) ==="
+          - "Slot 1 (locked):  {{ model_selection.slot1_general }}"
+          - "Slot 2 (locked):  {{ model_selection.slot2_general }}"
+          - "Slot 5 (rotate):  {{ model_selection.slot5_general_rotate | default('none') }}"
+          - "=== Node 0 — Coding (port 11435) ==="
+          - "Slot 3 (locked):  {{ model_selection.slot3_coding }}"
+          - "Slot 4 (locked):  {{ model_selection.slot4_coding }}"
+          - "Slot 6 (rotate):  {{ model_selection.slot6_coding_rotate | default('none') }}"
       tags:
         - models-load
 
@@ -72,8 +92,10 @@
       loop:
         - "{{ model_selection.slot1_general }}"
         - "{{ model_selection.slot2_general }}"
+        - "{{ model_selection.slot5_general_rotate | default('none') }}"
         - "{{ model_selection.slot3_coding }}"
         - "{{ model_selection.slot4_coding }}"
+        - "{{ model_selection.slot6_coding_rotate | default('none') }}"
       when:
         - item | length > 0
         - item != 'none'
@@ -130,6 +152,20 @@
       tags:
         - models-modelfile
 
+    - name: "Models | Template coder-rotate Modelfile"
+      ansible.builtin.copy:
+        content: |
+          FROM {{ model_selection.slot6_coding_rotate }}
+          PARAMETER num_ctx 32768
+          SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
+        dest: "{{ modelfiles_dir }}/Modelfile.coder-rotate"
+        mode: "0644"
+      when:
+        - model_selection.slot6_coding_rotate | default('') | length > 0
+        - model_selection.slot6_coding_rotate | default('none') != 'none'
+      tags:
+        - models-modelfile
+
     - name: "Models | Template llama-family Modelfile"
       ansible.builtin.copy:
         content: |
@@ -156,8 +192,9 @@
     - name: "Models | Register custom models with Ollama"
       ansible.builtin.command: "ollama create {{ item.name }} -f {{ modelfiles_dir }}/{{ item.file }}"
       loop:
-        - { name: "coder-128k", file: "Modelfile.coder-128k" }
-        - { name: "coder-32k",  file: "Modelfile.coder-32k",  slot: "{{ model_selection.slot4_coding }}" }
+        - { name: "coder-128k",   file: "Modelfile.coder-128k" }
+        - { name: "coder-32k",    file: "Modelfile.coder-32k",    slot: "{{ model_selection.slot4_coding }}" }
+        - { name: "coder-rotate", file: "Modelfile.coder-rotate", slot: "{{ model_selection.slot6_coding_rotate | default('none') }}" }
         - { name: "llama-family", file: "Modelfile.llama-family" }
         - { name: "gemma-family", file: "Modelfile.gemma-family" }
       when: item.slot is not defined or (item.slot | length > 0 and item.slot != 'none')
@@ -201,3 +238,33 @@
         state: started
       tags:
         - models-warmup
+
+    # ── Node0 warmup service ─────────────────────────────────────────
+    - name: "Models | Template node0 warmup script"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/ollama/warmup-node0.sh.j2"
+        dest: /usr/local/bin/ollama-warmup-node0.sh
+        mode: "0755"
+        owner: root
+        group: root
+      tags:
+        - models-warmup
+
+    - name: "Models | Template node0 warmup systemd service"
+      ansible.builtin.template:
+        src: "{{ playbook_dir }}/../templates/systemd/ollama-warmup-node0.service.j2"
+        dest: /etc/systemd/system/ollama-warmup-node0.service
+        mode: "0644"
+        owner: root
+        group: root
+      tags:
+        - models-warmup
+
+    - name: "Models | Enable and start node0 warmup service"
+      ansible.builtin.systemd:
+        name: ollama-warmup-node0
+        enabled: true
+        state: started
+        daemon_reload: true
+      tags:
+        - models-warmup

+ 1 - 1
playbooks/07_openwebui.yml

@@ -90,7 +90,7 @@
         _openwebui_env: >-
           {{
             {
-              'OLLAMA_BASE_URL': 'http://host.docker.internal:11434',
+              'OLLAMA_BASE_URLS': 'http://host.docker.internal:11434;http://host.docker.internal:11435',
               'OLLAMA_API_KEY': ollama_api_key,
               'WEBUI_SECRET_KEY': openwebui_secret_key,
               'WEBUI_AUTH': 'true',

+ 21 - 0
playbooks/08_openclaw.yml

@@ -77,6 +77,27 @@
       tags:
         - openclaw-config
 
+    - name: "OpenClaw | Load model selection for model assignment"
+      ansible.builtin.slurp:
+        src: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
+      delegate_to: localhost
+      become: false
+      register: _model_sel_raw
+      ignore_errors: true
+      when: not skip_openclaw
+      tags:
+        - openclaw-config
+
+    - name: "OpenClaw | Set openclaw_model from benchmark slot 1 (best general)"
+      ansible.builtin.set_fact:
+        openclaw_model: "{{ (_model_sel_raw.content | b64decode | from_json).slot1_general }}"
+      when:
+        - not skip_openclaw
+        - _model_sel_raw is not failed
+        - _model_sel_raw.content is defined
+      tags:
+        - openclaw-config
+
     # ── Install Python dependencies ───────────────────────────────────
     - name: "OpenClaw | Install Python dependencies"
       ansible.builtin.pip:

+ 171 - 0
playbooks/_bench_tier_batch.yml

@@ -0,0 +1,171 @@
+---
+# playbooks/_bench_tier_batch.yml
+# Included by 03_benchmark.yml once per batch of up to 6 models.
+#
+# Expected vars (passed via include_tasks vars:):
+#   _batch_node1  — list of 0–3 model names for port 11434
+#   _batch_node0  — list of 0–3 model names for port 11435
+#
+# Mutates host facts (accumulated across batches):
+#   bench_all_results    — list of uri result dicts
+#   all_eligible_models  — list of model names that passed load
+#
+# Concurrency design:
+#   Load:      node1 and node0 warm-up "Hi" prompts fire simultaneously (async).
+#              Within each node Ollama still loads one model at a time,
+#              but both nodes drain their queues in parallel.
+#   Benchmark: sequential (synchronous uri), one request at a time per node.
+#              Node1 drains fully, then node0. No queue contamination; each
+#              request gets a full idle inference slot and clean eval_duration.
+
+# ── Load models into RAM (both nodes concurrently) ────────────────────────────
+# 3 models per node, sequential within each node → last model waits for 2
+# ahead: max load wait ≤ 2 × load_timeout. Use load_timeout × 4 for margin.
+
+- name: "Benchmark | Load node1 models into RAM (async)"
+  ansible.builtin.uri:
+    url: "http://localhost:11434/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item }}"
+      prompt: "Hi"
+      stream: false
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
+    status_code: 200
+  loop: "{{ _batch_node1 }}"
+  loop_control:
+    label: "node1 load: {{ item }}"
+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
+  poll: 0
+  register: _load_node1_jobs
+  failed_when: false
+
+- name: "Benchmark | Load node0 models into RAM (async)"
+  ansible.builtin.uri:
+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item }}"
+      prompt: "Hi"
+      stream: false
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
+    status_code: 200
+  loop: "{{ _batch_node0 }}"
+  loop_control:
+    label: "node0 load: {{ item }}"
+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
+  poll: 0
+  register: _load_node0_jobs
+  failed_when: false
+
+- name: "Benchmark | Collect node1 load results"
+  ansible.builtin.async_status:
+    jid: "{{ _async_job.ansible_job_id }}"
+  loop: "{{ _load_node1_jobs.results | default([]) }}"
+  loop_control:
+    loop_var: _async_job
+    label: "node1 load: {{ _async_job.item | default('?') }}"
+  register: _load_node1
+  until: _load_node1.finished
+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
+  delay: 15
+  failed_when: false
+
+- name: "Benchmark | Collect node0 load results"
+  ansible.builtin.async_status:
+    jid: "{{ _async_job.ansible_job_id }}"
+  loop: "{{ _load_node0_jobs.results | default([]) }}"
+  loop_control:
+    loop_var: _async_job
+    label: "node0 load: {{ _async_job.item | default('?') }}"
+  register: _load_node0
+  until: _load_node0.finished
+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
+  delay: 15
+  failed_when: false
+
+# ── Identify successfully loaded models ───────────────────────────────────────
+
+- name: "Benchmark | Identify loaded models in batch"
+  ansible.builtin.set_fact:
+    _eligible_node1: "{{ _load_node1.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
+    _eligible_node0: "{{ _load_node0.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
+
+# ── Fire benchmark prompts sequentially (one request at a time per node) ──────
+# Sequential firing ensures each request hits an idle Ollama inference slot:
+# no queue contamination, full CPU budget per request, clean eval_duration.
+# Node1 then node0 run back-to-back; concurrent load phase above is unchanged.
+
+- name: "Benchmark | Fire test prompts at node1"
+  ansible.builtin.uri:
+    url: "http://localhost:11434/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item.0 }}"
+      prompt: "{{ test_prompts[item.1].prompt }}"
+      stream: false
+      options:
+        num_predict: "{{ benchmark_num_predict | int }}"
+        temperature: 0
+        seed: 42
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_large_timeout | int) }}"
+    status_code: 200
+  loop: "{{ _eligible_node1 | product(test_prompts.keys() | list) | list }}"
+  loop_control:
+    label: "{{ item.0 }} / {{ item.1 }}"
+  register: _bench_node1
+  failed_when: false
+
+- name: "Benchmark | Fire test prompts at node0"
+  ansible.builtin.uri:
+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
+    method: POST
+    body_format: json
+    body:
+      model: "{{ item.0 }}"
+      prompt: "{{ test_prompts[item.1].prompt }}"
+      stream: false
+      options:
+        num_predict: "{{ benchmark_num_predict | int }}"
+        temperature: 0
+        seed: 42
+    headers:
+      Authorization: "Bearer {{ ollama_api_key }}"
+    timeout: "{{ (benchmark_large_timeout | int) }}"
+    status_code: 200
+  loop: "{{ _eligible_node0 | product(test_prompts.keys() | list) | list }}"
+  loop_control:
+    label: "{{ item.0 }} / {{ item.1 }}"
+  register: _bench_node0
+  failed_when: false
+
+# ── Accumulate results into play-scoped facts ─────────────────────────────────
+# Synchronous uri populates result.item = [model, prompt_key] at top level —
+# no _async_job indirection needed; compute task in 03_benchmark.yml unchanged.
+
+- name: "Benchmark | Accumulate node1 results"
+  ansible.builtin.set_fact:
+    bench_all_results: "{{ bench_all_results + [item] }}"
+  loop: "{{ _bench_node1.results | default([]) }}"
+  loop_control:
+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
+
+- name: "Benchmark | Accumulate node0 results"
+  ansible.builtin.set_fact:
+    bench_all_results: "{{ bench_all_results + [item] }}"
+  loop: "{{ _bench_node0.results | default([]) }}"
+  loop_control:
+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
+
+- name: "Benchmark | Accumulate eligible models"
+  ansible.builtin.set_fact:
+    all_eligible_models: "{{ all_eligible_models + _eligible_node1 + _eligible_node0 }}"

+ 75 - 45
roles/models/README.md

@@ -2,79 +2,109 @@
 
 ## Purpose
 
-Manage the Ollama model lifecycle -- pulling models, creating custom Modelfile
-configurations, and running a warm-up service to ensure models are loaded into GPU
-memory at boot time.
+Manage the Ollama model lifecycle  pulling models, creating custom Modelfile
+configurations, and running warm-up services to ensure models are loaded into RAM
+at boot time across both NUMA instances.
 
-## Slot System
+## 6-Slot System
 
-| Slot | Role               | Selection Method                         |
-|------|--------------------|------------------------------------------|
-| 1    | Primary Coding     | Highest coding composite from benchmarks |
-| 2    | Primary General    | Highest general composite from benchmarks|
-| 3    | Secondary / Backup | Next-best overall average composite      |
-| 4    | Experimental       | Manual override via `-e slot4_model=<name>` |
+| Slot | Instance      | Port  | Role             | Selection                      | Rotation                    |
+|------|---------------|-------|------------------|--------------------------------|-----------------------------|
+| 1    | Node 1        | 11434 | General (locked) | Top general composite          | Re-benchmark only           |
+| 2    | Node 1        | 11434 | General (locked) | 2nd general composite          | Re-benchmark only           |
+| 5    | Node 1        | 11434 | General (rotate) | 3rd general composite          | `-e slot5_model=<name>`     |
+| 3    | Node 0        | 11435 | Coding (locked)  | Top coding composite           | Re-benchmark only           |
+| 4    | Node 0        | 11435 | Coding (locked)  | 2nd coding composite           | Re-benchmark only           |
+| 6    | Node 0        | 11435 | Coding (rotate)  | 3rd coding composite           | `-e slot6_model=<name>`     |
 
 ## Slot Rotation
 
-To override slot 4 with a specific model at runtime:
+Rotate the general slot on Node 1 (port 11434):
 
 ```bash
-ansible-playbook playbooks/03_ollama.yml -e slot4_model=mistral:7b
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
 ```
 
-Slots 1-3 are automatically assigned based on the latest benchmark results in
-`model_selection.json`. Slot 4 is always user-controlled.
+Rotate the coding slot on Node 0 (port 11435):
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
+```
+
+Both at once:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml \
+  -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
+```
+
+Reset both rotate slots back to benchmark recommendations:
+
+```bash
+ansible-playbook playbooks/04_models.yml -K -e @local.yml
+```
 
 ## Modelfile Configurations
 
-Custom Modelfile variants are created for fine-tuned context windows and use cases:
+Custom Modelfile variants are created for fine-tuned context windows:
+
+| Custom Model    | Base Slot    | Context | Port  | Use Case                         |
+|-----------------|--------------|---------|-------|----------------------------------|
+| `coder-128k`    | slot3_coding | 32768   | 11435 | Primary coding (large context)   |
+| `coder-32k`     | slot4_coding | 32768   | 11435 | Secondary coding                 |
+| `coder-rotate`  | slot6_coding_rotate | 32768 | 11435 | Rotatable coding model      |
+| `llama-family`  | llama3.2:3b  | 8192    | 11434 | Family-safe general assistant    |
+| `gemma-family`  | llama3.1:8b  | 8192    | 11434 | Family-safe general assistant    |
+
+**These aliases are excluded from benchmarking** via `benchmark_skip_aliases` — their
+32k-token parameter allocations stall the benchmark loop with 285-second responses.
+
+## Warm-up Services
+
+Two oneshot systemd services pre-load models after their respective Ollama instances start:
 
-| Custom Model          | Base Model           | Context Window | Use Case                    |
-|-----------------------|----------------------|----------------|-----------------------------|
-| `coding-primary`     | (slot 1 model)       | 32768          | Code generation and debugging |
-| `general-primary`    | (slot 2 model)       | 16384          | General conversation and reasoning |
-| `backup`             | (slot 3 model)       | 16384          | Fallback for either category |
-| `experimental`       | (slot 4 model)       | 8192           | Testing new models           |
+| Service                      | Warms               | Instance            |
+|------------------------------|---------------------|---------------------|
+| `ollama-warmup.service`      | slots 1, 2, 5       | Node 1 (port 11434) |
+| `ollama-warmup-node0.service`| slots 3, 4, 6       | Node 0 (port 11435) |
 
-## Warm-up Service
+`OLLAMA_KEEP_ALIVE=-1` keeps models pinned once loaded. The warmup services only
+need to run once after boot; subsequent requests hit already-loaded models immediately.
 
-The role deploys `ollama-warmup.service`, a oneshot systemd service that runs after
-`ollama.service` starts.
+Check warmup status:
 
-**Why it is needed:** Even though `OLLAMA_KEEP_ALIVE=-1` keeps models loaded in GPU
-memory indefinitely once loaded, Ollama does not automatically load models on
-startup. The warm-up service sends a minimal inference request to each slot model,
-triggering the initial load into GPU memory. Without this, the first user request
-to each model would experience a long delay while the model is loaded.
+```bash
+systemctl status ollama-warmup ollama-warmup-node0
+```
 
-The warm-up service:
+Re-run warmup manually (e.g. after rotating a slot):
 
-1. Waits for Ollama API to be healthy
-2. Sends a short prompt to each configured slot model
-3. Exits after all models are loaded
+```bash
+systemctl restart ollama-warmup          # Node 1 general models
+systemctl restart ollama-warmup-node0    # Node 0 coding models
+```
 
 ## model_selection.json
 
-The model selection file is read by this role to determine which models to assign to
-each slot. Schema:
+`playbooks/04_models.yml` reads `benchmarks/results/model_selection.json`:
 
 ```json
 {
-  "timestamp": "2025-01-15T10:30:00Z",
-  "slot1_coding": "qwen2.5-coder:14b",
-  "slot2_general": "llama3.1:8b",
-  "slot3_backup": "deepseek-coder-v2:16b",
-  "slot4_experimental": null
+  "slot1_general": "llama3.1:8b",
+  "slot2_general": "mistral:latest",
+  "slot5_general_rotate": "llama3.2:3b",
+  "slot3_coding": "deepseek-coder-v2:16b",
+  "slot4_coding": "qwen2.5-coder:7b",
+  "slot6_coding_rotate": "codegemma:7b",
+  "general_ranking": [...],
+  "coding_ranking": [...],
+  "all_metrics": { ... }
 }
 ```
 
-If `model_selection.json` does not exist (first run before benchmarks), the role
-falls back to default models defined in `group_vars/all.yml`.
-
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags models
-ansible-playbook playbooks/site.yml --tags warmup
+ansible-playbook playbooks/site.yml --tags models -K -e @local.yml
+ansible-playbook playbooks/site.yml --tags models-warmup -K -e @local.yml
 ```

+ 52 - 41
roles/ollama/README.md

@@ -2,69 +2,80 @@
 
 ## Purpose
 
-Install, configure, and maintain the Ollama inference server on the AI server host.
+Install, configure, and maintain Ollama inference server(s) on the AI server host.
+Two instances run simultaneously — one per NUMA socket — to utilize both CPU sockets
+on the Dell M630 (2× E5-2690v4).
 
-## Installation
+## Instances
 
-Ollama is installed using the official install script, which places the binary at
-`/usr/local/bin/ollama` and creates a systemd service. The script handles both fresh
-installs and upgrades.
+| Service                | Port  | NUMA Node | CPUs (physical only) | RAM binding | Purpose          |
+|------------------------|-------|-----------|----------------------|-------------|------------------|
+| `ollama.service`       | 11434 | Node 1    | 1 3 5 … 27 (odd)     | `--membind=1` | General models |
+| `ollama-node0.service` | 11435 | Node 0    | 0 2 4 … 26 (even)    | `--membind=0` | Coding models  |
 
-## Environment Variables
+Both instances share the same model storage directory (`/mnt/ai_data/ollama_models`)
+and Ollama API key. Weights are loaded once into the NUMA node's memory; they are not
+duplicated between instances.
 
-Configuration is applied via a systemd drop-in override file at
-`/etc/systemd/system/ollama.service.d/override.conf`.
+## Configuration
 
-| Variable                  | Value              | Description                                      |
-|---------------------------|--------------------|--------------------------------------------------|
-| `OLLAMA_HOST`             | `0.0.0.0:11434`   | Listen on all interfaces, port 11434             |
-| `OLLAMA_MODELS`           | `/mnt/ai_data/ollama/models` | Model storage directory                |
-| `OLLAMA_KEEP_ALIVE`       | `-1`               | Keep models loaded in GPU memory indefinitely    |
-| `OLLAMA_NUM_PARALLEL`     | `4`                | Number of parallel inference requests            |
-| `OLLAMA_MAX_LOADED_MODELS`| `4`                | Maximum models loaded in GPU memory at once      |
-| `OLLAMA_API_KEY`          | (from Vault)       | API key for authentication                       |
-| `OLLAMA_FLASH_ATTENTION`  | `1`                | Enable Flash Attention for performance           |
-| `OLLAMA_CONTEXT_LENGTH`   | `32768`            | Default context window size                      |
+### Node 1 — systemd override
 
-## Override.conf Approach
+Applied via `/etc/systemd/system/ollama.service.d/override.conf` (templated from
+`templates/ollama/override.conf.j2`):
 
-Rather than modifying the upstream systemd unit file (which would be overwritten on
-upgrades), this role uses a systemd drop-in directory:
+| Variable                   | Value                        | Description                                      |
+|----------------------------|------------------------------|--------------------------------------------------|
+| `OLLAMA_API_KEY`           | (from Vault)                 | Shared key for all API requests                  |
+| `OLLAMA_HOST`              | `0.0.0.0:11434`              | Listen on all interfaces, port 11434             |
+| `OLLAMA_MODELS`            | `/mnt/ai_data/ollama_models` | Shared model storage                             |
+| `OLLAMA_KEEP_ALIVE`        | `-1`                         | Never unload models from RAM                     |
+| `OLLAMA_FLASH_ATTENTION`   | `1`                          | Fused softmax — ~20% less memory bandwidth       |
+| `OLLAMA_NUM_THREADS`       | `14`                         | Physical cores on NUMA node 1 only               |
+| `OLLAMA_NUM_PARALLEL`      | `2`                          | Concurrent inference streams per instance        |
+| `OLLAMA_MAX_LOADED_MODELS` | `3`                          | 3 models warm per instance (6 total)             |
+| `CPUAffinity`              | `1 3 5 … 27`                 | Odd CPUs = socket 1 physical cores               |
+| `ExecStart`                | `numactl --membind=1 ollama serve` | Pin memory allocations to Node 1 RAM        |
 
-```
-/etc/systemd/system/ollama.service.d/override.conf
-```
+### Node 0 — standalone systemd unit
+
+Deployed to `/etc/systemd/system/ollama-node0.service` (from
+`templates/ollama/ollama-node0.service.j2`). Uses the same variables but with:
 
-This ensures environment variables survive Ollama upgrades while keeping the
-upstream service file intact.
+| Variable   | Value           |
+|------------|-----------------|
+| `OLLAMA_HOST` | `0.0.0.0:11435` |
+| `CPUAffinity` | `0 2 4 … 26` |
+| `ExecStart`   | `numactl --membind=0 ollama serve` |
 
-## Why OLLAMA_API_KEY
+## NUMA Rationale
 
-Without an API key, anyone with network access to port 11434 can use the Ollama API
-to run inference, pull models, or delete models. Setting `OLLAMA_API_KEY` requires
-all API requests to include an `Authorization: Bearer <key>` header, preventing
-unauthenticated access.
+On the M630 with dual E5-2690v4:
+- **Node 1** (odd CPUs) has ~120 GB free RAM — assigned general models (larger)
+- **Node 0** (even CPUs) has ~75 GB free RAM — assigned coding models
+
+Without `numactl --membind`, the OS allocates model weights and KV cache across both
+nodes, causing cross-socket memory traffic (~40 GB/s vs ~68–75 GB/s local).
+`CPUAffinity` alone sets the scheduler; `numactl` sets the memory policy.
 
 ## OLLAMA_FLASH_ATTENTION
 
-Flash Attention is a GPU memory optimization that reduces memory usage and increases
-throughput for transformer inference. Setting `OLLAMA_FLASH_ATTENTION=1` enables
-this optimization for all models. This is a newer addition to Ollama and provides
-measurable performance improvements.
+Enables fused softmax kernel — reduces attention memory bandwidth by ~20% and improves
+throughput at all context lengths on AVX2 (E5-2690v4). Note: `OLLAMA_KV_CACHE_TYPE`
+is intentionally **not** set — q8_0 dequantization overhead regressed throughput on
+this CPU despite the bandwidth savings.
 
 ## Upgrade Procedure
 
-To upgrade Ollama to the latest version:
-
 ```bash
-ansible-playbook playbooks/03_ollama.yml
+ansible-playbook playbooks/02_infrastructure.yml -K -e @local.yml --tags ollama
 ```
 
-The official install script detects the existing installation and performs an
-in-place upgrade. The service is restarted after the upgrade.
+The official install script detects the existing installation and performs an in-place
+upgrade. Both `ollama.service` and `ollama-node0.service` are restarted.
 
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags ollama
+ansible-playbook playbooks/site.yml --tags ollama -K -e @local.yml
 ```

+ 31 - 25
roles/openclaw/README.md

@@ -3,56 +3,62 @@
 ## Purpose
 
 Deploy OpenClaw, a Telegram bot that provides access to Ollama models via Telegram
-messaging.
+messaging. Always uses the best warm general-purpose model (`slot1_general` from the
+last benchmark run).
 
 ## Prerequisites
 
 - A Telegram bot token obtained from [@BotFather](https://t.me/BotFather)
 - The token must be stored in Vault at `{{ vault_secret_prefix }}/openclaw:telegram_token`
+- `benchmarks/results/model_selection.json` must exist (produced by `03_benchmark.yml`)
 
-## Installation
+## Model Selection
 
-1. Node.js 20 is installed on the target host
-2. OpenClaw is installed globally via `npm install -g openclaw`
-3. A systemd service (`openclaw.service`) is created for process management
+`08_openclaw.yml` reads `benchmarks/results/model_selection.json` at deploy time and
+sets `openclaw_model` to `slot1_general` — the highest-scoring general model that is
+always warm on the Node 1 instance (port 11434). This ensures the bot always uses the
+best available model without requiring manual updates after a benchmark run.
 
-## Configuration
+The fallback value (used when `model_selection.json` is absent) is set in
+`inventory/group_vars/all.yml` under `openclaw_model`.
 
-Config file location: `/mnt/ai_data/openclaw/config.yml`
+## Ollama Endpoint
 
-The configuration includes:
+OpenClaw connects to `localhost:11434` — the Node 1 general instance. Coding models on
+port 11435 are not accessible to the bot; they are reserved for IDE and API integrations.
 
-- Ollama API endpoint and authentication
-- Telegram bot token (read from Vault)
-- Default model selection
-- Allowed user IDs (if access control is needed)
+## Installation
 
-## Service
+1. Python 3 dependencies (`python-telegram-bot`, `requests`, `pyyaml`) are installed via `pip3`
+2. The bot script is deployed to `/mnt/ai_data/openclaw/bot.py`
+3. Config is templated to `/mnt/ai_data/openclaw/config.yml`
+4. A systemd service (`openclaw.service`) manages the process
 
-```
-/etc/systemd/system/openclaw.service
-```
+## Configuration
+
+Config file location: `/mnt/ai_data/openclaw/config.yml`
 
-The service runs as a systemd unit, automatically starting on boot and restarting
-on failure.
+The configuration includes:
+- Ollama API endpoint (`http://localhost:11434`) and API key (from Vault)
+- Telegram bot token (from Vault)
+- Model name (from `slot1_general`)
 
 ## Vault Integration
 
-The Telegram bot token is stored in Vault:
-
 - **Path:** `{{ vault_secret_prefix }}/openclaw`
 - **Key:** `telegram_token`
 
-The role reads the token from Vault at deploy time and writes it to the config file.
+The Telegram token is read from Vault at deploy time and written to the config file.
 
 ## Skipping Installation
 
-If no Telegram bot token is configured (the Vault secret is empty or absent),
-the OpenClaw installation is skipped entirely during `site.yml`. This allows
-running the full playbook without a Telegram bot token if the feature is not needed.
+If no Telegram bot token is configured (Vault secret absent or empty), the entire
+OpenClaw installation is skipped. This allows running `site.yml` without a Telegram
+bot token.
 
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags openclaw
+ansible-playbook playbooks/site.yml --tags openclaw -K -e @local.yml
+ansible-playbook playbooks/08_openclaw.yml -K -e @local.yml
 ```

+ 36 - 39
roles/openwebui/README.md

@@ -2,30 +2,41 @@
 
 ## Purpose
 
-Deploy Open WebUI with full Ollama integration, RAG support via Qdrant, and SSO via
-Keycloak OIDC.
+Deploy Open WebUI with full Ollama integration across both NUMA instances, RAG support
+via Qdrant, and SSO via Keycloak OIDC.
+
+## Ollama Backend Configuration
+
+Open WebUI connects to **both** Ollama instances simultaneously via `OLLAMA_BASE_URLS`.
+It load-balances requests across them and presents models from both as a single unified
+list.
+
+| Instance      | Port  | Models              |
+|---------------|-------|---------------------|
+| Node 1        | 11434 | General (slots 1-2-5) |
+| Node 0        | 11435 | Coding (slots 3-4-6) |
 
 ## Environment Variables
 
-| Variable                      | Value                                                        | Source      |
-|-------------------------------|--------------------------------------------------------------|-------------|
-| `OLLAMA_BASE_URL`             | `http://host.docker.internal:11434`                         | Hardcoded   |
-| `OLLAMA_API_KEY`              | (Ollama API key)                                             | Vault       |
-| `WEBUI_SECRET_KEY`            | (session signing key)                                        | Vault       |
-| `VECTOR_DB`                   | `qdrant`                                                     | Hardcoded   |
-| `QDRANT_URI`                  | `http://host.docker.internal:6333`                          | Hardcoded   |
-| `ENABLE_RAG_WEB_SEARCH`      | `true`                                                       | Hardcoded   |
-| `OAUTH_CLIENT_ID`            | `open-webui`                                                 | Hardcoded   |
-| `OAUTH_CLIENT_SECRET`        | (OIDC client secret)                                         | Vault       |
-| `OPENID_PROVIDER_URL`        | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration` | Vault (keycloak_oidc_url) |
-| `OAUTH_PROVIDER_NAME`        | `{{ platform_name }}`                                        | group_vars  |
-| `ENABLE_OAUTH_SIGNUP`        | `true`                                                       | Hardcoded   |
-| `DEFAULT_USER_ROLE`          | `user`                                                       | Hardcoded   |
-| `WEBUI_NAME`                 | `{{ platform_name }}`                                        | group_vars  |
-| `ENABLE_OAUTH_ROLE_MANAGEMENT` | `true`                                                     | Hardcoded   |
-| `OAUTH_ROLES_CLAIM`          | `realm_access.roles`                                         | Hardcoded   |
-| `OAUTH_ALLOWED_ROLES`        | `ai-user,ai-admin`                                           | Hardcoded   |
-| `OAUTH_ADMIN_ROLES`          | `ai-admin`                                                   | Hardcoded   |
+| Variable                      | Value                                                                                     | Source      |
+|-------------------------------|-------------------------------------------------------------------------------------------|-------------|
+| `OLLAMA_BASE_URLS`            | `http://host.docker.internal:11434;http://host.docker.internal:11435`                    | Hardcoded   |
+| `OLLAMA_API_KEY`              | (Ollama API key)                                                                          | Vault       |
+| `RAG_OLLAMA_BASE_URL`         | `http://host.docker.internal:11434`                                                       | Hardcoded   |
+| `WEBUI_SECRET_KEY`            | (session signing key)                                                                     | Vault       |
+| `VECTOR_DB`                   | `qdrant`                                                                                  | Hardcoded   |
+| `QDRANT_URI`                  | `http://host.docker.internal:6333`                                                        | Hardcoded   |
+| `OAUTH_CLIENT_ID`             | `open-webui`                                                                              | Hardcoded   |
+| `OAUTH_CLIENT_SECRET`         | (OIDC client secret)                                                                      | Vault       |
+| `OPENID_PROVIDER_URL`         | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration`           | Vault       |
+| `OAUTH_PROVIDER_NAME`         | `{{ platform_name }}`                                                                     | group_vars  |
+| `ENABLE_OAUTH_SIGNUP`         | `true`                                                                                    | Hardcoded   |
+| `ENABLE_OAUTH_ROLE_MANAGEMENT`| `true`                                                                                    | Hardcoded   |
+| `OAUTH_ROLES_CLAIM`           | `realm_access.roles`                                                                      | Hardcoded   |
+| `OAUTH_ALLOWED_ROLES`         | `ai-user,ai-admin`                                                                        | Hardcoded   |
+| `OAUTH_ADMIN_ROLES`           | `ai-admin`                                                                                | Hardcoded   |
+| `DEFAULT_MODELS`              | `llama-family`                                                                            | Hardcoded   |
+| `WEBUI_NAME`                  | `{{ platform_name }}`                                                                     | group_vars  |
 
 ## OIDC Setup
 
@@ -38,22 +49,12 @@ Open WebUI uses Keycloak as its OIDC provider:
 ## RAG
 
 - **Vector DB:** Qdrant at `http://host.docker.internal:6333`
-- **Web search:** enabled via `ENABLE_RAG_WEB_SEARCH=true`
-- Users can upload documents through the Open WebUI interface for RAG-augmented
-  conversations
-
-## Model Access
-
-Open WebUI connects to Ollama at `http://host.docker.internal:11434` (the Docker
-host network). The `OLLAMA_API_KEY` environment variable authenticates API requests
-to the Ollama server.
+- `RAG_OLLAMA_BASE_URL` is pinned to port 11434 (Node 1) for embedding requests —
+  keeping RAG on a single stable endpoint avoids split-brain embedding indices
+- Users can upload documents through the Open WebUI interface for RAG-augmented conversations
 
 ## SSO
 
-Users see a "Sign in with {{ platform_name }}" button on the login page. Clicking it
-redirects to the Keycloak login page for the `{{ keycloak_realm }}` realm. After
-authentication, users are redirected back to Open WebUI.
-
 Access is restricted by Keycloak realm role:
 
 | Keycloak role | Open WebUI access      |
@@ -62,12 +63,8 @@ Access is restricted by Keycloak realm role:
 | `ai-admin`    | ✅ Admin               |
 | *(none)*      | ❌ Login blocked       |
 
-New users who authenticate via SSO are automatically created. Their Open WebUI role
-is set based on `OAUTH_ADMIN_ROLES` — users with `ai-admin` get admin access,
-all others get standard user access.
-
 ## Tags
 
 ```bash
-ansible-playbook playbooks/site.yml --tags openwebui
+ansible-playbook playbooks/site.yml --tags openwebui -K -e @local.yml
 ```

+ 26 - 0
templates/ollama/ollama-node0.service.j2

@@ -0,0 +1,26 @@
+[Unit]
+Description=Ollama Service — NUMA Node 0 (Coding Models)
+After=network-online.target ollama.service
+Wants=network-online.target
+
+[Service]
+ExecStart=/usr/bin/numactl --cpunodebind=0 {{ ollama_binary_path }} serve
+Environment="OLLAMA_API_KEY={{ ollama_api_key }}"
+Environment="OLLAMA_HOST=0.0.0.0:{{ ollama_node0_port }}"
+Environment="OLLAMA_MODELS=/mnt/ai_data/ollama_models"
+Environment="OLLAMA_KEEP_ALIVE={{ ollama_keep_alive }}"
+Environment="OLLAMA_FLASH_ATTENTION={{ ollama_flash_attention }}"
+Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
+Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
+Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
+CPUAffinity={{ ollama_node0_cpu_affinity }}
+LimitMEMLOCK=infinity
+LimitNOFILE=65535
+OOMScoreAdjust=-500
+Restart=always
+RestartSec=3
+User=ollama
+Group=ollama
+
+[Install]
+WantedBy=multi-user.target

+ 16 - 15
templates/ollama/override.conf.j2

@@ -9,12 +9,6 @@ Environment="OLLAMA_KEEP_ALIVE=-1"
 # Flash attention: fused softmax, ~20% less memory bandwidth, faster on AVX2
 Environment="OLLAMA_FLASH_ATTENTION=1"
 
-# KV cache quantization: q8_0 halves KV cache memory vs fp16.
-# Attention reads dominate memory bandwidth at long contexts; smaller KV =
-# fewer bytes transferred per token generated. q8_0 over q4_0: negligible
-# quality loss vs significant noise at long contexts with q4_0.
-Environment="OLLAMA_KV_CACHE_TYPE=q8_0"
-
 # Threads: 14 physical cores on NUMA node 1 only (no hyperthreads).
 # LLM inference is memory-bandwidth-bound; HT siblings share the same memory
 # pipeline and add scheduling overhead without adding bandwidth.
@@ -24,19 +18,26 @@ Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
 # Keeps per-request throughput high for interactive/single-user workloads.
 Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
 
-# Keep 4 models warm in RAM (KEEP_ALIVE=-1 means never unload)
+# Keep 3 models warm in RAM per instance (KEEP_ALIVE=-1 means never unload; 6 total across both sockets)
 Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
 
 # ── NUMA / CPU binding ────────────────────────────────────────────────────
-# ExecStart override: numactl --membind=1 guarantees model weights and KV
-# cache are allocated from NUMA node 1 RAM (120 GB free). CPUAffinity alone
-# does not set the memory policy; numactl makes it explicit.
+# numactl --cpunodebind pins the scheduler to all logical CPUs on node 1
+# (14 physical + 14 HT siblings = 28 CPUs). This avoids two failure modes:
+#
+#  1. numactl --membind=1 (MPOL_BIND) suppresses khugepaged THP promotion
+#     for the model's ~2.75 GB anonymous allocation, causing ~700k 4 KB TLB
+#     entries and near-100% L2-STLB miss rate → 128x throughput loss.
+#
+#  2. CPUAffinity restricted to 14 physical cores only forces ~56 Go runtime
+#     OS threads to compete with 14 GGML compute threads on 14 CPUs (5:1
+#     oversubscription). GGML busy-wait barriers then block waiting threads
+#     from checking in → cascading stall across ~400 ops/token → 128x loss.
+#
+# --cpunodebind (sched_setaffinity only, no set_mempolicy) gives 28 CPUs and
+# MPOL_DEFAULT, so allocations go to node 1 naturally and THP works freely.
 ExecStart=
-ExecStart=/usr/bin/numactl --membind=1 {{ ollama_binary_path }} serve
-
-# Restrict scheduler to physical cores on node 1 only (odd CPUs 1–27).
-# Omitting HT siblings (29–55) prevents cross-HT contention on the memory bus.
-CPUAffinity={{ ollama_cpu_affinity }}
+ExecStart=/usr/bin/numactl --cpunodebind={{ ollama_numa_node }} {{ ollama_binary_path }} serve
 
 # ── Memory hardening ───────────────────────────────────────────────────────
 # Prevent model weights from being paged out under memory pressure

+ 28 - 0
templates/ollama/warmup-node0.sh.j2

@@ -0,0 +1,28 @@
+#!/bin/bash
+# Ollama Node 0 model warm-up script (coding models, port {{ ollama_node0_port }})
+# Sends a 1-token generation to each slot model to pin them in RAM
+
+set -e
+
+OLLAMA_URL="http://localhost:{{ ollama_node0_port }}"
+API_KEY="{{ ollama_api_key }}"
+
+warmup_model() {
+    local model="$1"
+    echo "[warmup-node0] Loading: $model"
+    curl -sf -X POST "${OLLAMA_URL}/api/generate" \
+        -H "Authorization: Bearer ${API_KEY}" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"${model}\",\"prompt\":\"Hi\",\"stream\":false,\"options\":{\"num_predict\":1}}" \
+        > /dev/null || echo "[warmup-node0] Warning: failed to warm up ${model}"
+    echo "[warmup-node0] Done: $model"
+}
+
+warmup_model "{{ model_selection.slot3_coding }}"
+warmup_model "{{ model_selection.slot4_coding }}"
+{% if model_selection.slot6_coding_rotate | default('') | length > 0
+      and model_selection.slot6_coding_rotate | default('none') != 'none' %}
+warmup_model "{{ model_selection.slot6_coding_rotate }}"
+{% endif %}
+
+echo "[warmup-node0] All Node 0 coding models warmed up."

+ 4 - 4
templates/ollama/warmup.sh.j2

@@ -20,9 +20,9 @@ warmup_model() {
 
 warmup_model "{{ model_selection.slot1_general }}"
 warmup_model "{{ model_selection.slot2_general }}"
-warmup_model "{{ model_selection.slot3_coding }}"
-{% if model_selection.slot4_coding | length > 0 and model_selection.slot4_coding != 'none' %}
-warmup_model "{{ model_selection.slot4_coding }}"
+{% if model_selection.slot5_general_rotate | default('') | length > 0
+      and model_selection.slot5_general_rotate | default('none') != 'none' %}
+warmup_model "{{ model_selection.slot5_general_rotate }}"
 {% endif %}
 
-echo "[warmup] All models warmed up."
+echo "[warmup] All Node 1 general models warmed up."

+ 14 - 0
templates/systemd/ollama-warmup-node0.service.j2

@@ -0,0 +1,14 @@
+[Unit]
+Description=Ollama Model Warm-Up — Node 0 (Coding)
+After=ollama-node0.service
+Requires=ollama-node0.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=/usr/local/bin/ollama-warmup-node0.sh
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target

+ 15 - 0
templates/vault/vault-unseal.service.j2

@@ -0,0 +1,15 @@
+[Unit]
+Description=HashiCorp Vault Auto-Unseal
+Documentation=https://developer.hashicorp.com/vault/docs/concepts/seal
+After=vault.service network.target
+Requires=vault.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/vault-unseal.sh
+RemainAfterExit=no
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target

+ 11 - 11
templates/vault/vault-unseal.sh.j2

@@ -1,25 +1,25 @@
 #!/bin/bash
-# Vault auto-unseal script
-# Reads unseal key from vault-init.json and unseals Vault
+# Vault auto-unseal script — managed by Ansible, do not edit manually
+# Reads unseal key from /etc/vault.d/unseal.key and unseals Vault
 
 set -e
 
-VAULT_ADDR="http://127.0.0.1:8200"
-INIT_FILE="/docker_mounts/vault/vault-init.json"
+VAULT_ADDR="http://127.0.0.1:{{ vault_port }}"
+UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
 
-if [ ! -f "$INIT_FILE" ]; then
-    echo "ERROR: vault-init.json not found at $INIT_FILE"
+if [ ! -f "$UNSEAL_KEY_FILE" ]; then
+    echo "ERROR: unseal key not found at $UNSEAL_KEY_FILE"
     exit 1
 fi
 
-UNSEAL_KEY=$(jq -r '.unseal_keys_b64[0]' "$INIT_FILE")
+UNSEAL_KEY=$(cat "$UNSEAL_KEY_FILE")
 
 if [ -z "$UNSEAL_KEY" ]; then
-    echo "ERROR: Could not extract unseal key from $INIT_FILE"
+    echo "ERROR: unseal key file is empty"
     exit 1
 fi
 
-# Wait for Vault to be ready
+# Wait for Vault API to become ready (up to 60 s)
 for i in $(seq 1 30); do
     STATUS=$(curl -sf "${VAULT_ADDR}/v1/sys/health" 2>/dev/null || true)
     if [ -n "$STATUS" ]; then
@@ -30,7 +30,7 @@ for i in $(seq 1 30); do
         fi
         break
     fi
-    echo "Waiting for Vault... ($i/30)"
+    echo "Waiting for Vault API... ($i/30)"
     sleep 2
 done
 
@@ -38,5 +38,5 @@ echo "Unsealing Vault..."
 curl -sf -X PUT "${VAULT_ADDR}/v1/sys/unseal" \
     -H "Content-Type: application/json" \
     -d "{\"key\": \"${UNSEAL_KEY}\"}"
-
+echo ""
 echo "Vault unsealed successfully."

+ 74 - 0
tftsr_nginx-hardening/CLAUDE.md

@@ -0,0 +1,74 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Target Environment
+
+- **OS:** RHEL 9.6, **NGINX:** 1.20.1 at `/etc/nginx/`, **Ansible:** `ansible_connection: local`
+- **TLS certs:** `/etc/letsencrypt/live/tftsr.com-0001/{fullchain,privkey}.pem`
+- **Services proxied:** 15 internal services on `*.tftsr.com` / `tftsr.com`
+- `sudo dnf install -y ansible-core` is required before first run (not managed by this project)
+
+## Run Commands
+
+```bash
+# Full hardening (all three roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges from ipdeny.com (run periodically)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+## Architecture
+
+Three independent roles, each runnable standalone via `playbooks/`:
+
+### `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
+
+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. The 4 services that already have HTTP→HTTPS redirects (keycloak-proxy, vault, ollama-api, vaultwarden) are not in `nginx_redirect_services`. Do not add `ssl_session_cache` to `00-ssl-params.conf` — all service configs already declare `shared:SSL:1m` in their server blocks and a conflicting http-level declaration will break `nginx -t`.
+
+### `fail2ban`
+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
+- `sshd` → `/var/log/secure`
+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
+
+### `geo_blocking`
+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
+
+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
+
+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
+
+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
+```bash
+# On a machine WITH internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+
+# Then run with the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
+
+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
+
+## Key Design Decisions
+
+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.

+ 4 - 0
tftsr_nginx-hardening/ansible.cfg

@@ -0,0 +1,4 @@
+[defaults]
+inventory = inventory/hosts.yml
+roles_path = roles
+host_key_checking = False

+ 4 - 0
tftsr_nginx-hardening/inventory/hosts.yml

@@ -0,0 +1,4 @@
+all:
+  hosts:
+    localhost:
+      ansible_connection: local

+ 73 - 0
tftsr_nginx-hardening/nginx-hardening/CLAUDE.md

@@ -0,0 +1,73 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Target Environment
+
+- **OS:** RHEL 9, **NGINX:** 1.20+ at `/etc/nginx/`
+- Playbooks target `hosts: all` — configure the target in `inventory/hosts.yml`
+- `sudo dnf install -y ansible-core` is required on the control node before first run
+
+## Run Commands
+
+```bash
+# Full hardening (all three roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges from ipdeny.com (run periodically)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+## Architecture
+
+Three independent roles, each runnable standalone via `playbooks/`:
+
+### `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
+
+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. Only list services in `nginx_redirect_services` that are **missing** a port-80 redirect — services that already have one must be excluded or NGINX will have duplicate `server_name` entries. Do not add `ssl_session_cache` to `00-ssl-params.conf` — if any existing service configs already declare `shared:SSL:Xm` in their server blocks, a conflicting http-level declaration with a different size will break `nginx -t`.
+
+### `fail2ban`
+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
+- `sshd` → `/var/log/secure`
+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
+
+### `geo_blocking`
+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
+
+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
+
+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
+
+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
+```bash
+# On a machine WITH internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+rsync -av --no-group /tmp/geo_zones/ user@your-host:/opt/geo_zones/
+
+# Then run with the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
+
+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
+
+## Key Design Decisions
+
+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.

+ 179 - 0
tftsr_nginx-hardening/nginx-hardening/README.md

@@ -0,0 +1,179 @@
+# nginx-hardening
+
+Ansible project to harden an NGINX reverse proxy to a production security posture. Applies security headers, TLS hardening, HTTP→HTTPS redirects, fail2ban jails, and nftables-based country geo-blocking — without modifying any existing service configurations.
+
+## Target environment
+
+- **OS:** RHEL 9 / Rocky Linux 9 / AlmaLinux 9
+- **NGINX:** 1.20+ with existing service configs in `/etc/nginx/conf.d/`
+- **EPEL:** Must be installed before running (`dnf install -y epel-release`)
+- **nftables:** Installed but not required to be running (managed by this project)
+- **firewalld:** Should be inactive to avoid nftables coexistence issues
+
+## What it does
+
+### Role: `nginx_hardening`
+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they load before all service configs:
+
+| File | Purpose |
+|------|---------|
+| `00-security-headers.conf` | `server_tokens off`, HSTS, X-Frame-Options, X-Content-Type-Options, CSP, rate-limit zone |
+| `00-ssl-params.conf` | TLS 1.2/1.3 only, hardened cipher suite, OCSP stapling, session timeout |
+| `00-proxy-params.conf` | Strips `X-Powered-By`/`Server`, sets `X-Real-IP` and `X-Forwarded-*` headers |
+| `00-http-redirects.conf` | Port-80 → HTTPS 301 redirects for services listed in `nginx_redirect_services` |
+
+**No existing service configs are modified.**
+
+### Role: `fail2ban`
+Installs fail2ban from EPEL and configures three jails:
+
+| Jail | Log | Trigger |
+|------|-----|---------|
+| `sshd` | `/var/log/secure` | Failed SSH logins |
+| `nginx-4xx` | `/var/log/nginx/access.log` | Repeated 4xx responses |
+| `nginx-auth` | `/var/log/nginx/access.log` | Repeated 401/403 responses |
+
+### Role: `geo_blocking`
+Builds a standalone `table inet geo_block` nftables ruleset populated with CIDRs for every country except the US, downloaded from [ipdeny.com](https://www.ipdeny.com). The table is loaded at boot via `/etc/sysconfig/nftables.conf`.
+
+## Prerequisites
+
+On the **Ansible control node** (the machine you run `ansible-playbook` from):
+```bash
+# Ansible itself
+pip install ansible-core
+# or
+dnf install -y ansible-core
+```
+
+On the **target host** (applied automatically by the playbooks):
+- EPEL repo must already be installed
+- SSH access with a user that can `sudo`
+
+## Setup
+
+### 1. Configure your inventory
+
+Edit `inventory/hosts.yml`:
+```yaml
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: 192.168.1.10          # your server's IP or hostname
+      ansible_user: your_ssh_user
+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
+```
+
+### 2. Configure HTTP→HTTPS redirects
+
+Edit `roles/nginx_hardening/defaults/main.yml` and populate `nginx_redirect_services` with any services that are **missing** a port-80 redirect in their existing NGINX config:
+
+```yaml
+nginx_redirect_services:
+  - name: myapp
+    server_name: myapp.example.com
+  - name: dashboard
+    server_name: dashboard.example.com
+```
+
+Services that already have a redirect in their existing `conf.d/` file should **not** be listed here.
+
+### 3. (Optional) Tune defaults
+
+All tunable variables live in each role's `defaults/main.yml`:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `nginx_hsts_max_age` | `31536000` | HSTS max-age in seconds |
+| `nginx_rate_limit_req_zone` | `30r/m` | Rate limit zone definition |
+| `nginx_client_max_body_size` | `10m` | Max upload body size |
+| `fail2ban_bantime` | `3600` | Ban duration (seconds) |
+| `fail2ban_maxretry_ssh` | `5` | SSH failures before ban |
+| `fail2ban_maxretry_nginx_auth` | `5` | 401/403 failures before ban |
+
+## Running
+
+```bash
+# Full hardening (all roles)
+ansible-playbook -K site.yml
+
+# Individual roles
+ansible-playbook -K playbooks/nginx_hardening.yml
+ansible-playbook -K playbooks/fail2ban.yml
+ansible-playbook -K playbooks/geo_blocking.yml
+
+# Refresh country IP ranges (run periodically — ipdeny.com updates regularly)
+ansible-playbook -K playbooks/update_geo_blocks.yml
+
+# Dry run — no changes applied
+ansible-playbook -K --check site.yml
+```
+
+`-K` prompts for the sudo password. Omit it if your user has passwordless sudo.
+
+## Geo-blocking: servers without direct internet access
+
+If your target server cannot reach `ipdeny.com`, pre-download the zone files on a machine that can and copy them over:
+
+```bash
+# On a machine WITH unrestricted internet access:
+./scripts/download-geo-zones.sh /tmp/geo_zones
+
+# Copy to the target server:
+rsync -av --no-group /tmp/geo_zones/ user@your-server:/opt/geo_zones/
+
+# Run the playbook pointing at the local cache:
+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+```
+
+To make the cache path permanent, add it to your inventory:
+```yaml
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: 192.168.1.10
+      ansible_user: your_ssh_user
+      geo_zone_files_dir: /opt/geo_zones
+```
+
+### Unblocking a country
+
+Set `blocked: false` for the desired country code in `roles/geo_blocking/defaults/main.yml`, then re-run `update_geo_blocks.yml`.
+
+## Verification
+
+After a successful run:
+
+```bash
+# NGINX config is valid
+sudo nginx -t
+
+# Security headers are present
+curl -sI https://your-domain.com | grep -i 'strict\|x-frame\|x-content'
+
+# HTTP redirects to HTTPS
+curl -I http://your-domain.com   # expect: 301 Moved Permanently
+
+# fail2ban jails are active
+sudo fail2ban-client status
+sudo fail2ban-client status nginx-4xx
+
+# nftables geo-block table is loaded
+sudo nft list table inet geo_block
+```
+
+## Files written to the target host
+
+| Path | Action |
+|------|--------|
+| `/etc/nginx/conf.d/00-security-headers.conf` | Created |
+| `/etc/nginx/conf.d/00-ssl-params.conf` | Created |
+| `/etc/nginx/conf.d/00-proxy-params.conf` | Created |
+| `/etc/nginx/conf.d/00-http-redirects.conf` | Created |
+| `/etc/fail2ban/jail.local` | Created |
+| `/etc/fail2ban/filter.d/nginx-4xx.conf` | Created |
+| `/etc/fail2ban/filter.d/nginx-auth.conf` | Created |
+| `/etc/nftables.d/geo-block.nft` | Created |
+| `/etc/sysconfig/nftables.conf` | Appended (include line) |
+
+All tasks that write files use `backup: yes` — a timestamped copy is created automatically before each overwrite.

+ 4 - 0
tftsr_nginx-hardening/nginx-hardening/ansible.cfg

@@ -0,0 +1,4 @@
+[defaults]
+inventory = inventory/hosts.yml
+roles_path = roles
+host_key_checking = False

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml

@@ -0,0 +1,7 @@
+all:
+  hosts:
+    nginx-proxy:
+      ansible_host: YOUR_SERVER_IP
+      ansible_user: YOUR_SSH_USER
+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
+      # geo_zone_files_dir: /opt/geo_zones   # set if server cannot reach ipdeny.com

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - fail2ban

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - geo_blocking

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - nginx_hardening

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml

@@ -0,0 +1,5 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - geo_blocking

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml

@@ -0,0 +1,7 @@
+---
+fail2ban_bantime: 3600
+fail2ban_findtime: 600
+fail2ban_maxretry_ssh: 5
+fail2ban_maxretry_nginx_4xx: 20
+fail2ban_maxretry_nginx_auth: 5
+fail2ban_ignoreip: "127.0.0.1/8 ::1"

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: restart fail2ban
+  ansible.builtin.service:
+    name: fail2ban
+    state: restarted

+ 41 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml

@@ -0,0 +1,41 @@
+---
+- name: Install fail2ban
+  ansible.builtin.dnf:
+    name: fail2ban
+    state: present
+
+- name: Deploy nginx-4xx filter
+  ansible.builtin.template:
+    src: nginx-4xx.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy nginx-auth filter
+  ansible.builtin.template:
+    src: nginx-auth.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy jail.local configuration
+  ansible.builtin.template:
+    src: jail.local.j2
+    dest: /etc/fail2ban/jail.local
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Enable and start fail2ban service
+  ansible.builtin.service:
+    name: fail2ban
+    state: started
+    enabled: yes

+ 22 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2

@@ -0,0 +1,22 @@
+[DEFAULT]
+ignoreip = {{ fail2ban_ignoreip }}
+bantime  = {{ fail2ban_bantime }}
+findtime = {{ fail2ban_findtime }}
+
+[sshd]
+enabled  = true
+port     = ssh
+logpath  = /var/log/secure
+maxretry = {{ fail2ban_maxretry_ssh }}
+
+[nginx-4xx]
+enabled  = true
+filter   = nginx-4xx
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
+
+[nginx-auth]
+enabled  = true
+filter   = nginx-auth
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_auth }}

+ 3 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
+ignoreregex =

+ 3 - 0
tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
+ignoreregex =

+ 509 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml

@@ -0,0 +1,509 @@
+---
+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
+geo_nft_table_dir: "/etc/nftables.d"
+geo_nft_file: "/etc/nftables.d/geo-block.nft"
+# Set this to a directory containing pre-downloaded {cc}.zone files when the
+# target host has no outbound internet access. Leave empty to download live.
+geo_zone_files_dir: ""
+
+geo_countries:
+  - code: AD   # Andorra
+    blocked: true
+  - code: AE   # United Arab Emirates
+    blocked: true
+  - code: AF   # Afghanistan
+    blocked: true
+  - code: AG   # Antigua and Barbuda
+    blocked: true
+  - code: AI   # Anguilla
+    blocked: true
+  - code: AL   # Albania
+    blocked: true
+  - code: AM   # Armenia
+    blocked: true
+  - code: AO   # Angola
+    blocked: true
+  - code: AQ   # Antarctica
+    blocked: true
+  - code: AR   # Argentina
+    blocked: true
+  - code: AS   # American Samoa
+    blocked: true
+  - code: AT   # Austria
+    blocked: true
+  - code: AU   # Australia
+    blocked: true
+  - code: AW   # Aruba
+    blocked: true
+  - code: AX   # Aland Islands
+    blocked: true
+  - code: AZ   # Azerbaijan
+    blocked: true
+  - code: BA   # Bosnia and Herzegovina
+    blocked: true
+  - code: BB   # Barbados
+    blocked: true
+  - code: BD   # Bangladesh
+    blocked: true
+  - code: BE   # Belgium
+    blocked: true
+  - code: BF   # Burkina Faso
+    blocked: true
+  - code: BG   # Bulgaria
+    blocked: true
+  - code: BH   # Bahrain
+    blocked: true
+  - code: BI   # Burundi
+    blocked: true
+  - code: BJ   # Benin
+    blocked: true
+  - code: BL   # Saint Barthelemy
+    blocked: true
+  - code: BM   # Bermuda
+    blocked: true
+  - code: BN   # Brunei Darussalam
+    blocked: true
+  - code: BO   # Bolivia
+    blocked: true
+  - code: BQ   # Bonaire
+    blocked: true
+  - code: BR   # Brazil
+    blocked: true
+  - code: BS   # Bahamas
+    blocked: true
+  - code: BT   # Bhutan
+    blocked: true
+  - code: BV   # Bouvet Island — no ipdeny zone file
+    blocked: false
+  - code: BW   # Botswana
+    blocked: true
+  - code: BY   # Belarus
+    blocked: true
+  - code: BZ   # Belize
+    blocked: true
+  - code: CA   # Canada
+    blocked: true
+  - code: CC   # Cocos Islands
+    blocked: true
+  - code: CD   # Dem. Rep. Congo
+    blocked: true
+  - code: CF   # Central African Republic
+    blocked: true
+  - code: CG   # Congo
+    blocked: true
+  - code: CH   # Switzerland
+    blocked: true
+  - code: CI   # Cote d'Ivoire
+    blocked: true
+  - code: CK   # Cook Islands
+    blocked: true
+  - code: CL   # Chile
+    blocked: true
+  - code: CM   # Cameroon
+    blocked: true
+  - code: CN   # China
+    blocked: true
+  - code: CO   # Colombia
+    blocked: true
+  - code: CR   # Costa Rica
+    blocked: true
+  - code: CU   # Cuba
+    blocked: true
+  - code: CV   # Cabo Verde
+    blocked: true
+  - code: CW   # Curacao
+    blocked: true
+  - code: CX   # Christmas Island — no ipdeny zone file
+    blocked: false
+  - code: CY   # Cyprus
+    blocked: true
+  - code: CZ   # Czechia
+    blocked: true
+  - code: DE   # Germany
+    blocked: true
+  - code: DJ   # Djibouti
+    blocked: true
+  - code: DK   # Denmark
+    blocked: true
+  - code: DM   # Dominica
+    blocked: true
+  - code: DO   # Dominican Republic
+    blocked: true
+  - code: DZ   # Algeria
+    blocked: true
+  - code: EC   # Ecuador
+    blocked: true
+  - code: EE   # Estonia
+    blocked: true
+  - code: EG   # Egypt
+    blocked: true
+  - code: EH   # Western Sahara — no ipdeny zone file
+    blocked: false
+  - code: ER   # Eritrea
+    blocked: true
+  - code: ES   # Spain
+    blocked: true
+  - code: ET   # Ethiopia
+    blocked: true
+  - code: FI   # Finland
+    blocked: true
+  - code: FJ   # Fiji
+    blocked: true
+  - code: FK   # Falkland Islands
+    blocked: true
+  - code: FM   # Micronesia
+    blocked: true
+  - code: FO   # Faroe Islands
+    blocked: true
+  - code: FR   # France
+    blocked: true
+  - code: GA   # Gabon
+    blocked: true
+  - code: GB   # United Kingdom
+    blocked: true
+  - code: GD   # Grenada
+    blocked: true
+  - code: GE   # Georgia
+    blocked: true
+  - code: GF   # French Guiana
+    blocked: true
+  - code: GG   # Guernsey
+    blocked: true
+  - code: GH   # Ghana
+    blocked: true
+  - code: GI   # Gibraltar
+    blocked: true
+  - code: GL   # Greenland
+    blocked: true
+  - code: GM   # Gambia
+    blocked: true
+  - code: GN   # Guinea
+    blocked: true
+  - code: GP   # Guadeloupe
+    blocked: true
+  - code: GQ   # Equatorial Guinea
+    blocked: true
+  - code: GR   # Greece
+    blocked: true
+  - code: GS   # South Georgia — no ipdeny zone file
+    blocked: false
+  - code: GT   # Guatemala
+    blocked: true
+  - code: GU   # Guam
+    blocked: true
+  - code: GW   # Guinea-Bissau
+    blocked: true
+  - code: GY   # Guyana
+    blocked: true
+  - code: HK   # Hong Kong
+    blocked: true
+  - code: HM   # Heard Island — no ipdeny zone file
+    blocked: false
+  - code: HN   # Honduras
+    blocked: true
+  - code: HR   # Croatia
+    blocked: true
+  - code: HT   # Haiti
+    blocked: true
+  - code: HU   # Hungary
+    blocked: true
+  - code: ID   # Indonesia
+    blocked: true
+  - code: IE   # Ireland
+    blocked: true
+  - code: IL   # Israel
+    blocked: true
+  - code: IM   # Isle of Man
+    blocked: true
+  - code: IN   # India
+    blocked: true
+  - code: IO   # British Indian Ocean Territory
+    blocked: true
+  - code: IQ   # Iraq
+    blocked: true
+  - code: IR   # Iran
+    blocked: true
+  - code: IS   # Iceland
+    blocked: true
+  - code: IT   # Italy
+    blocked: true
+  - code: JE   # Jersey
+    blocked: true
+  - code: JM   # Jamaica
+    blocked: true
+  - code: JO   # Jordan
+    blocked: true
+  - code: JP   # Japan
+    blocked: true
+  - code: KE   # Kenya
+    blocked: true
+  - code: KG   # Kyrgyzstan
+    blocked: true
+  - code: KH   # Cambodia
+    blocked: true
+  - code: KI   # Kiribati
+    blocked: true
+  - code: KM   # Comoros
+    blocked: true
+  - code: KN   # Saint Kitts and Nevis
+    blocked: true
+  - code: KP   # North Korea
+    blocked: true
+  - code: KR   # South Korea
+    blocked: true
+  - code: KW   # Kuwait
+    blocked: true
+  - code: KY   # Cayman Islands
+    blocked: true
+  - code: KZ   # Kazakhstan
+    blocked: true
+  - code: LA   # Laos
+    blocked: true
+  - code: LB   # Lebanon
+    blocked: true
+  - code: LC   # Saint Lucia
+    blocked: true
+  - code: LI   # Liechtenstein
+    blocked: true
+  - code: LK   # Sri Lanka
+    blocked: true
+  - code: LR   # Liberia
+    blocked: true
+  - code: LS   # Lesotho
+    blocked: true
+  - code: LT   # Lithuania
+    blocked: true
+  - code: LU   # Luxembourg
+    blocked: true
+  - code: LV   # Latvia
+    blocked: true
+  - code: LY   # Libya
+    blocked: true
+  - code: MA   # Morocco
+    blocked: true
+  - code: MC   # Monaco
+    blocked: true
+  - code: MD   # Moldova
+    blocked: true
+  - code: ME   # Montenegro
+    blocked: true
+  - code: MF   # Saint Martin
+    blocked: true
+  - code: MG   # Madagascar
+    blocked: true
+  - code: MH   # Marshall Islands
+    blocked: true
+  - code: MK   # North Macedonia
+    blocked: true
+  - code: ML   # Mali
+    blocked: true
+  - code: MM   # Myanmar
+    blocked: true
+  - code: MN   # Mongolia
+    blocked: true
+  - code: MO   # Macao
+    blocked: true
+  - code: MP   # Northern Mariana Islands
+    blocked: true
+  - code: MQ   # Martinique
+    blocked: true
+  - code: MR   # Mauritania
+    blocked: true
+  - code: MS   # Montserrat
+    blocked: true
+  - code: MT   # Malta
+    blocked: true
+  - code: MU   # Mauritius
+    blocked: true
+  - code: MV   # Maldives
+    blocked: true
+  - code: MW   # Malawi
+    blocked: true
+  - code: MX   # Mexico
+    blocked: true
+  - code: MY   # Malaysia
+    blocked: true
+  - code: MZ   # Mozambique
+    blocked: true
+  - code: NA   # Namibia
+    blocked: true
+  - code: NC   # New Caledonia
+    blocked: true
+  - code: NE   # Niger
+    blocked: true
+  - code: NF   # Norfolk Island
+    blocked: true
+  - code: NG   # Nigeria
+    blocked: true
+  - code: NI   # Nicaragua
+    blocked: true
+  - code: NL   # Netherlands
+    blocked: true
+  - code: "NO"  # Norway
+    blocked: true
+  - code: NP   # Nepal
+    blocked: true
+  - code: NR   # Nauru
+    blocked: true
+  - code: NU   # Niue
+    blocked: true
+  - code: NZ   # New Zealand
+    blocked: true
+  - code: OM   # Oman
+    blocked: true
+  - code: PA   # Panama
+    blocked: true
+  - code: PE   # Peru
+    blocked: true
+  - code: PF   # French Polynesia
+    blocked: true
+  - code: PG   # Papua New Guinea
+    blocked: true
+  - code: PH   # Philippines
+    blocked: true
+  - code: PK   # Pakistan
+    blocked: true
+  - code: PL   # Poland
+    blocked: true
+  - code: PM   # Saint Pierre and Miquelon
+    blocked: true
+  - code: PN   # Pitcairn — no ipdeny zone file
+    blocked: false
+  - code: PR   # Puerto Rico
+    blocked: true
+  - code: PS   # Palestine
+    blocked: true
+  - code: PT   # Portugal
+    blocked: true
+  - code: PW   # Palau
+    blocked: true
+  - code: PY   # Paraguay
+    blocked: true
+  - code: QA   # Qatar
+    blocked: true
+  - code: RE   # Reunion
+    blocked: true
+  - code: RO   # Romania
+    blocked: true
+  - code: RS   # Serbia
+    blocked: true
+  - code: RU   # Russia
+    blocked: true
+  - code: RW   # Rwanda
+    blocked: true
+  - code: SA   # Saudi Arabia
+    blocked: true
+  - code: SB   # Solomon Islands
+    blocked: true
+  - code: SC   # Seychelles
+    blocked: true
+  - code: SD   # Sudan
+    blocked: true
+  - code: SE   # Sweden
+    blocked: true
+  - code: SG   # Singapore
+    blocked: true
+  - code: SH   # Saint Helena — no ipdeny zone file
+    blocked: false
+  - code: SI   # Slovenia
+    blocked: true
+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
+    blocked: false
+  - code: SK   # Slovakia
+    blocked: true
+  - code: SL   # Sierra Leone
+    blocked: true
+  - code: SM   # San Marino
+    blocked: true
+  - code: SN   # Senegal
+    blocked: true
+  - code: SO   # Somalia
+    blocked: true
+  - code: SR   # Suriname
+    blocked: true
+  - code: SS   # South Sudan
+    blocked: true
+  - code: ST   # Sao Tome and Principe
+    blocked: true
+  - code: SV   # El Salvador
+    blocked: true
+  - code: SX   # Sint Maarten
+    blocked: true
+  - code: SY   # Syria
+    blocked: true
+  - code: SZ   # Eswatini
+    blocked: true
+  - code: TC   # Turks and Caicos Islands
+    blocked: true
+  - code: TD   # Chad
+    blocked: true
+  - code: TF   # French Southern Territories — no ipdeny zone file
+    blocked: false
+  - code: TG   # Togo
+    blocked: true
+  - code: TH   # Thailand
+    blocked: true
+  - code: TJ   # Tajikistan
+    blocked: true
+  - code: TK   # Tokelau
+    blocked: true
+  - code: TL   # Timor-Leste
+    blocked: true
+  - code: TM   # Turkmenistan
+    blocked: true
+  - code: TN   # Tunisia
+    blocked: true
+  - code: TO   # Tonga
+    blocked: true
+  - code: TR   # Turkey
+    blocked: true
+  - code: TT   # Trinidad and Tobago
+    blocked: true
+  - code: TV   # Tuvalu
+    blocked: true
+  - code: TW   # Taiwan
+    blocked: true
+  - code: TZ   # Tanzania
+    blocked: true
+  - code: UA   # Ukraine
+    blocked: true
+  - code: UG   # Uganda
+    blocked: true
+  - code: UM   # US Minor Outlying Islands
+    blocked: true
+  - code: US   # United States
+    blocked: false
+  - code: UY   # Uruguay
+    blocked: true
+  - code: UZ   # Uzbekistan
+    blocked: true
+  - code: VA   # Vatican City
+    blocked: true
+  - code: VC   # Saint Vincent and the Grenadines
+    blocked: true
+  - code: VE   # Venezuela
+    blocked: true
+  - code: VG   # British Virgin Islands
+    blocked: true
+  - code: VI   # US Virgin Islands
+    blocked: true
+  - code: VN   # Vietnam
+    blocked: true
+  - code: VU   # Vanuatu
+    blocked: true
+  - code: WF   # Wallis and Futuna
+    blocked: true
+  - code: WS   # Samoa
+    blocked: true
+  - code: XK   # Kosovo — no ipdeny zone file
+    blocked: false
+  - code: YE   # Yemen
+    blocked: true
+  - code: YT   # Mayotte
+    blocked: true
+  - code: ZA   # South Africa
+    blocked: true
+  - code: ZM   # Zambia
+    blocked: true
+  - code: ZW   # Zimbabwe
+    blocked: true

+ 4 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml

@@ -0,0 +1,4 @@
+---
+- name: reload nftables
+  ansible.builtin.command: nft -f {{ geo_nft_file }}
+  changed_when: true

+ 103 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml

@@ -0,0 +1,103 @@
+---
+- name: Ensure nftables.d directory exists
+  ansible.builtin.file:
+    path: "{{ geo_nft_table_dir }}"
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Create temp directory for zone files
+  ansible.builtin.tempfile:
+    state: directory
+    suffix: geo_zones
+  register: geo_temp_dir
+
+# --- Source: live download ---
+
+- name: Test connectivity to ipdeny.com (fast pre-check)
+  ansible.builtin.uri:
+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
+    method: HEAD
+    timeout: 8
+  register: geo_connectivity_check
+  ignore_errors: yes
+  when: geo_zone_files_dir | length == 0
+
+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
+  ansible.builtin.fail:
+    msg: >-
+      Cannot reach ipdeny.com (connection timed out or refused) and
+      geo_zone_files_dir is not set. Pre-download zone files on a machine
+      with internet access using scripts/download-geo-zones.sh, copy them
+      to this host, then set geo_zone_files_dir in inventory or with -e.
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is failed
+
+- name: Download zone files for blocked countries
+  ansible.builtin.get_url:
+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    timeout: 30
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is succeeded
+
+# --- Source: local pre-downloaded cache ---
+
+- name: Copy zone files from local cache directory
+  ansible.builtin.copy:
+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    remote_src: yes
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when: geo_zone_files_dir | length > 0
+
+# --- Assemble and deploy ---
+
+- name: Assemble all CIDRs from downloaded zone files
+  ansible.builtin.shell: >
+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
+    grep -v '^#' | grep -v '^$' | sort -u
+  register: geo_cidrs_raw
+  changed_when: false
+
+- name: Set geo_blocked_cidrs fact
+  ansible.builtin.set_fact:
+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
+
+- name: Deploy geo-block nftables ruleset
+  ansible.builtin.template:
+    src: geo-block.nft.j2
+    dest: "{{ geo_nft_file }}"
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nftables
+
+- name: Ensure nftables.conf includes geo-block.nft
+  ansible.builtin.lineinfile:
+    path: /etc/sysconfig/nftables.conf
+    line: 'include "{{ geo_nft_file }}"'
+    state: present
+    backup: yes
+
+- name: Enable and start nftables service
+  ansible.builtin.service:
+    name: nftables
+    state: started
+    enabled: yes
+
+- name: Clean up temp directory
+  ansible.builtin.file:
+    path: "{{ geo_temp_dir.path }}"
+    state: absent

+ 26 - 0
tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2

@@ -0,0 +1,26 @@
+#!/usr/sbin/nft -f
+# Managed by Ansible — do not edit manually
+
+# Ensure table exists, then flush for idempotency
+add table inet geo_block
+flush table inet geo_block
+
+table inet geo_block {
+    set blocked_countries {
+        type ipv4_addr
+        flags interval
+{% if geo_blocked_cidrs | length > 0 %}
+        elements = {
+{% for cidr in geo_blocked_cidrs %}
+            {{ cidr }}{% if not loop.last %},{% endif %}
+
+{% endfor %}
+        }
+{% endif %}
+    }
+
+    chain prerouting {
+        type filter hook prerouting priority -100; policy accept;
+        ip saddr @blocked_countries drop
+    }
+}

+ 15 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml

@@ -0,0 +1,15 @@
+---
+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
+nginx_hsts_max_age: 31536000
+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
+nginx_client_max_body_size: "10m"
+nginx_proxy_read_timeout: 60
+
+# Services that need a port-80 → HTTPS redirect added.
+# List only services that do NOT already have a redirect in their existing config.
+nginx_redirect_services:
+  - name: service1
+    server_name: service1.example.com
+  - name: service2
+    server_name: service2.example.com

+ 5 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: reload nginx
+  ansible.builtin.service:
+    name: nginx
+    state: reloaded

+ 44 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml

@@ -0,0 +1,44 @@
+---
+- name: Deploy security headers configuration
+  ansible.builtin.template:
+    src: security_headers.conf.j2
+    dest: /etc/nginx/conf.d/00-security-headers.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy SSL parameters configuration
+  ansible.builtin.template:
+    src: ssl_params.conf.j2
+    dest: /etc/nginx/conf.d/00-ssl-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy proxy parameters configuration
+  ansible.builtin.template:
+    src: proxy_params.conf.j2
+    dest: /etc/nginx/conf.d/00-proxy-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy HTTP to HTTPS redirect configuration
+  ansible.builtin.template:
+    src: http_redirect.conf.j2
+    dest: /etc/nginx/conf.d/00-http-redirects.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Validate NGINX configuration
+  ansible.builtin.command: nginx -t
+  changed_when: false

+ 8 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+{% for svc in nginx_redirect_services %}
+server {
+    listen 80;
+    server_name {{ svc.server_name }};
+    return 301 https://$host$request_uri;
+}
+{% endfor %}

+ 8 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+
+proxy_hide_header X-Powered-By;
+proxy_hide_header Server;
+proxy_set_header X-Real-IP $remote_addr;
+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+proxy_set_header X-Forwarded-Proto $scheme;
+proxy_read_timeout {{ nginx_proxy_read_timeout }};

+ 17 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2

@@ -0,0 +1,17 @@
+# Managed by Ansible — do not edit manually
+
+server_tokens off;
+
+# Rate limiting zone definition
+limit_req_zone {{ nginx_rate_limit_req_zone }};
+
+# Client body size limit
+client_max_body_size {{ nginx_client_max_body_size }};
+
+# Security headers
+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
+add_header X-Frame-Options SAMEORIGIN always;
+add_header X-Content-Type-Options nosniff always;
+add_header Referrer-Policy strict-origin-when-cross-origin always;
+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
+add_header X-XSS-Protection "1; mode=block" always;

+ 10 - 0
tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2

@@ -0,0 +1,10 @@
+# Managed by Ansible — do not edit manually
+
+ssl_protocols {{ nginx_ssl_protocols }};
+ssl_ciphers {{ nginx_ssl_ciphers }};
+ssl_prefer_server_ciphers off;
+ssl_session_timeout 1d;
+ssl_stapling on;
+ssl_stapling_verify on;
+resolver 8.8.8.8 8.8.4.4 valid=300s;
+resolver_timeout 5s;

+ 71 - 0
tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Download ipdeny.com aggregated zone files for all blocked countries.
+# Run this on a machine WITH internet access, then rsync the output
+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
+#
+# Usage:
+#   ./scripts/download-geo-zones.sh [output-dir]
+#
+# Example workflow:
+#   # On your workstation:
+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+#
+#   # Then run the playbook pointing at the cache:
+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+
+set -euo pipefail
+
+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
+OUT_DIR="${1:-/tmp/geo_zones}"
+
+# All blocked country codes (excludes US and ipdeny-absent territories)
+COUNTRIES=(
+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
+  DE DJ DK DM DO DZ
+  EC EE EG ER ES ET
+  FI FJ FK FM FO FR
+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
+  HK HN HR HT HU
+  ID IE IL IM IN IO IQ IR IS IT
+  JE JM JO JP
+  KE KG KH KI KM KN KP KR KW KY KZ
+  LA LB LC LI LK LR LS LT LU LV LY
+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
+  NA NC NE NF NG NI NL NO NP NR NU NZ
+  OM
+  PA PE PF PG PH PK PL PM PR PS PT PW PY
+  QA
+  RE RO RS RU RW
+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
+  UA UG UM UY UZ
+  VA VC VE VG VI VN VU
+  WF WS
+  YE YT
+  ZA ZM ZW
+)
+
+mkdir -p "$OUT_DIR"
+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
+
+ok=0; fail=0
+for cc in "${COUNTRIES[@]}"; do
+  url="${BASE_URL}/${cc,,}-aggregated.zone"
+  dest="${OUT_DIR}/${cc,,}.zone"
+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
+    (( ++ok ))
+  else
+    echo "  SKIP $cc (no zone file at ipdeny.com)"
+    rm -f "$dest"
+    (( ++fail ))
+  fi
+done
+
+echo "Done: $ok downloaded, $fail skipped."
+echo ""
+echo "Next steps:"
+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"

+ 7 - 0
tftsr_nginx-hardening/nginx-hardening/site.yml

@@ -0,0 +1,7 @@
+---
+- hosts: all
+  become: true
+  roles:
+    - nginx_hardening
+    - fail2ban
+    - geo_blocking

+ 6 - 0
tftsr_nginx-hardening/playbooks/fail2ban.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - fail2ban

+ 6 - 0
tftsr_nginx-hardening/playbooks/geo_blocking.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - geo_blocking

+ 6 - 0
tftsr_nginx-hardening/playbooks/nginx_hardening.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - nginx_hardening

+ 6 - 0
tftsr_nginx-hardening/playbooks/update_geo_blocks.yml

@@ -0,0 +1,6 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - geo_blocking

+ 7 - 0
tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml

@@ -0,0 +1,7 @@
+---
+fail2ban_bantime: 3600
+fail2ban_findtime: 600
+fail2ban_maxretry_ssh: 5
+fail2ban_maxretry_nginx_4xx: 20
+fail2ban_maxretry_nginx_auth: 5
+fail2ban_ignoreip: "127.0.0.1/8 ::1"

+ 5 - 0
tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: restart fail2ban
+  ansible.builtin.service:
+    name: fail2ban
+    state: restarted

+ 41 - 0
tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml

@@ -0,0 +1,41 @@
+---
+- name: Install fail2ban
+  ansible.builtin.dnf:
+    name: fail2ban
+    state: present
+
+- name: Deploy nginx-4xx filter
+  ansible.builtin.template:
+    src: nginx-4xx.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy nginx-auth filter
+  ansible.builtin.template:
+    src: nginx-auth.conf.j2
+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Deploy jail.local configuration
+  ansible.builtin.template:
+    src: jail.local.j2
+    dest: /etc/fail2ban/jail.local
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: restart fail2ban
+
+- name: Enable and start fail2ban service
+  ansible.builtin.service:
+    name: fail2ban
+    state: started
+    enabled: yes

+ 22 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2

@@ -0,0 +1,22 @@
+[DEFAULT]
+ignoreip = {{ fail2ban_ignoreip }}
+bantime  = {{ fail2ban_bantime }}
+findtime = {{ fail2ban_findtime }}
+
+[sshd]
+enabled  = true
+port     = ssh
+logpath  = /var/log/secure
+maxretry = {{ fail2ban_maxretry_ssh }}
+
+[nginx-4xx]
+enabled  = true
+filter   = nginx-4xx
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
+
+[nginx-auth]
+enabled  = true
+filter   = nginx-auth
+logpath  = /var/log/nginx/access.log
+maxretry = {{ fail2ban_maxretry_nginx_auth }}

+ 3 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
+ignoreregex =

+ 3 - 0
tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2

@@ -0,0 +1,3 @@
+[Definition]
+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
+ignoreregex =

+ 509 - 0
tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml

@@ -0,0 +1,509 @@
+---
+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
+geo_nft_table_dir: "/etc/nftables.d"
+geo_nft_file: "/etc/nftables.d/geo-block.nft"
+# Set this to a directory containing pre-downloaded {cc}.zone files when the
+# target host has no outbound internet access. Leave empty to download live.
+geo_zone_files_dir: ""
+
+geo_countries:
+  - code: AD   # Andorra
+    blocked: true
+  - code: AE   # United Arab Emirates
+    blocked: true
+  - code: AF   # Afghanistan
+    blocked: true
+  - code: AG   # Antigua and Barbuda
+    blocked: true
+  - code: AI   # Anguilla
+    blocked: true
+  - code: AL   # Albania
+    blocked: true
+  - code: AM   # Armenia
+    blocked: true
+  - code: AO   # Angola
+    blocked: true
+  - code: AQ   # Antarctica
+    blocked: true
+  - code: AR   # Argentina
+    blocked: true
+  - code: AS   # American Samoa
+    blocked: true
+  - code: AT   # Austria
+    blocked: true
+  - code: AU   # Australia
+    blocked: true
+  - code: AW   # Aruba
+    blocked: true
+  - code: AX   # Aland Islands
+    blocked: true
+  - code: AZ   # Azerbaijan
+    blocked: true
+  - code: BA   # Bosnia and Herzegovina
+    blocked: true
+  - code: BB   # Barbados
+    blocked: true
+  - code: BD   # Bangladesh
+    blocked: true
+  - code: BE   # Belgium
+    blocked: true
+  - code: BF   # Burkina Faso
+    blocked: true
+  - code: BG   # Bulgaria
+    blocked: true
+  - code: BH   # Bahrain
+    blocked: true
+  - code: BI   # Burundi
+    blocked: true
+  - code: BJ   # Benin
+    blocked: true
+  - code: BL   # Saint Barthelemy
+    blocked: true
+  - code: BM   # Bermuda
+    blocked: true
+  - code: BN   # Brunei Darussalam
+    blocked: true
+  - code: BO   # Bolivia
+    blocked: true
+  - code: BQ   # Bonaire
+    blocked: true
+  - code: BR   # Brazil
+    blocked: true
+  - code: BS   # Bahamas
+    blocked: true
+  - code: BT   # Bhutan
+    blocked: true
+  - code: BV   # Bouvet Island — no ipdeny zone file
+    blocked: false
+  - code: BW   # Botswana
+    blocked: true
+  - code: BY   # Belarus
+    blocked: true
+  - code: BZ   # Belize
+    blocked: true
+  - code: CA   # Canada
+    blocked: true
+  - code: CC   # Cocos Islands
+    blocked: true
+  - code: CD   # Dem. Rep. Congo
+    blocked: true
+  - code: CF   # Central African Republic
+    blocked: true
+  - code: CG   # Congo
+    blocked: true
+  - code: CH   # Switzerland
+    blocked: true
+  - code: CI   # Cote d'Ivoire
+    blocked: true
+  - code: CK   # Cook Islands
+    blocked: true
+  - code: CL   # Chile
+    blocked: true
+  - code: CM   # Cameroon
+    blocked: true
+  - code: CN   # China
+    blocked: true
+  - code: CO   # Colombia
+    blocked: true
+  - code: CR   # Costa Rica
+    blocked: true
+  - code: CU   # Cuba
+    blocked: true
+  - code: CV   # Cabo Verde
+    blocked: true
+  - code: CW   # Curacao
+    blocked: true
+  - code: CX   # Christmas Island — no ipdeny zone file
+    blocked: false
+  - code: CY   # Cyprus
+    blocked: true
+  - code: CZ   # Czechia
+    blocked: true
+  - code: DE   # Germany
+    blocked: true
+  - code: DJ   # Djibouti
+    blocked: true
+  - code: DK   # Denmark
+    blocked: true
+  - code: DM   # Dominica
+    blocked: true
+  - code: DO   # Dominican Republic
+    blocked: true
+  - code: DZ   # Algeria
+    blocked: true
+  - code: EC   # Ecuador
+    blocked: true
+  - code: EE   # Estonia
+    blocked: true
+  - code: EG   # Egypt
+    blocked: true
+  - code: EH   # Western Sahara — no ipdeny zone file
+    blocked: false
+  - code: ER   # Eritrea
+    blocked: true
+  - code: ES   # Spain
+    blocked: true
+  - code: ET   # Ethiopia
+    blocked: true
+  - code: FI   # Finland
+    blocked: true
+  - code: FJ   # Fiji
+    blocked: true
+  - code: FK   # Falkland Islands
+    blocked: true
+  - code: FM   # Micronesia
+    blocked: true
+  - code: FO   # Faroe Islands
+    blocked: true
+  - code: FR   # France
+    blocked: true
+  - code: GA   # Gabon
+    blocked: true
+  - code: GB   # United Kingdom
+    blocked: true
+  - code: GD   # Grenada
+    blocked: true
+  - code: GE   # Georgia
+    blocked: true
+  - code: GF   # French Guiana
+    blocked: true
+  - code: GG   # Guernsey
+    blocked: true
+  - code: GH   # Ghana
+    blocked: true
+  - code: GI   # Gibraltar
+    blocked: true
+  - code: GL   # Greenland
+    blocked: true
+  - code: GM   # Gambia
+    blocked: true
+  - code: GN   # Guinea
+    blocked: true
+  - code: GP   # Guadeloupe
+    blocked: true
+  - code: GQ   # Equatorial Guinea
+    blocked: true
+  - code: GR   # Greece
+    blocked: true
+  - code: GS   # South Georgia — no ipdeny zone file
+    blocked: false
+  - code: GT   # Guatemala
+    blocked: true
+  - code: GU   # Guam
+    blocked: true
+  - code: GW   # Guinea-Bissau
+    blocked: true
+  - code: GY   # Guyana
+    blocked: true
+  - code: HK   # Hong Kong
+    blocked: true
+  - code: HM   # Heard Island — no ipdeny zone file
+    blocked: false
+  - code: HN   # Honduras
+    blocked: true
+  - code: HR   # Croatia
+    blocked: true
+  - code: HT   # Haiti
+    blocked: true
+  - code: HU   # Hungary
+    blocked: true
+  - code: ID   # Indonesia
+    blocked: true
+  - code: IE   # Ireland
+    blocked: true
+  - code: IL   # Israel
+    blocked: true
+  - code: IM   # Isle of Man
+    blocked: true
+  - code: IN   # India
+    blocked: true
+  - code: IO   # British Indian Ocean Territory
+    blocked: true
+  - code: IQ   # Iraq
+    blocked: true
+  - code: IR   # Iran
+    blocked: true
+  - code: IS   # Iceland
+    blocked: true
+  - code: IT   # Italy
+    blocked: true
+  - code: JE   # Jersey
+    blocked: true
+  - code: JM   # Jamaica
+    blocked: true
+  - code: JO   # Jordan
+    blocked: true
+  - code: JP   # Japan
+    blocked: true
+  - code: KE   # Kenya
+    blocked: true
+  - code: KG   # Kyrgyzstan
+    blocked: true
+  - code: KH   # Cambodia
+    blocked: true
+  - code: KI   # Kiribati
+    blocked: true
+  - code: KM   # Comoros
+    blocked: true
+  - code: KN   # Saint Kitts and Nevis
+    blocked: true
+  - code: KP   # North Korea
+    blocked: true
+  - code: KR   # South Korea
+    blocked: true
+  - code: KW   # Kuwait
+    blocked: true
+  - code: KY   # Cayman Islands
+    blocked: true
+  - code: KZ   # Kazakhstan
+    blocked: true
+  - code: LA   # Laos
+    blocked: true
+  - code: LB   # Lebanon
+    blocked: true
+  - code: LC   # Saint Lucia
+    blocked: true
+  - code: LI   # Liechtenstein
+    blocked: true
+  - code: LK   # Sri Lanka
+    blocked: true
+  - code: LR   # Liberia
+    blocked: true
+  - code: LS   # Lesotho
+    blocked: true
+  - code: LT   # Lithuania
+    blocked: true
+  - code: LU   # Luxembourg
+    blocked: true
+  - code: LV   # Latvia
+    blocked: true
+  - code: LY   # Libya
+    blocked: true
+  - code: MA   # Morocco
+    blocked: true
+  - code: MC   # Monaco
+    blocked: true
+  - code: MD   # Moldova
+    blocked: true
+  - code: ME   # Montenegro
+    blocked: true
+  - code: MF   # Saint Martin
+    blocked: true
+  - code: MG   # Madagascar
+    blocked: true
+  - code: MH   # Marshall Islands
+    blocked: true
+  - code: MK   # North Macedonia
+    blocked: true
+  - code: ML   # Mali
+    blocked: true
+  - code: MM   # Myanmar
+    blocked: true
+  - code: MN   # Mongolia
+    blocked: true
+  - code: MO   # Macao
+    blocked: true
+  - code: MP   # Northern Mariana Islands
+    blocked: true
+  - code: MQ   # Martinique
+    blocked: true
+  - code: MR   # Mauritania
+    blocked: true
+  - code: MS   # Montserrat
+    blocked: true
+  - code: MT   # Malta
+    blocked: true
+  - code: MU   # Mauritius
+    blocked: true
+  - code: MV   # Maldives
+    blocked: true
+  - code: MW   # Malawi
+    blocked: true
+  - code: MX   # Mexico
+    blocked: true
+  - code: MY   # Malaysia
+    blocked: true
+  - code: MZ   # Mozambique
+    blocked: true
+  - code: NA   # Namibia
+    blocked: true
+  - code: NC   # New Caledonia
+    blocked: true
+  - code: NE   # Niger
+    blocked: true
+  - code: NF   # Norfolk Island
+    blocked: true
+  - code: NG   # Nigeria
+    blocked: true
+  - code: NI   # Nicaragua
+    blocked: true
+  - code: NL   # Netherlands
+    blocked: true
+  - code: "NO"  # Norway
+    blocked: true
+  - code: NP   # Nepal
+    blocked: true
+  - code: NR   # Nauru
+    blocked: true
+  - code: NU   # Niue
+    blocked: true
+  - code: NZ   # New Zealand
+    blocked: true
+  - code: OM   # Oman
+    blocked: true
+  - code: PA   # Panama
+    blocked: true
+  - code: PE   # Peru
+    blocked: true
+  - code: PF   # French Polynesia
+    blocked: true
+  - code: PG   # Papua New Guinea
+    blocked: true
+  - code: PH   # Philippines
+    blocked: true
+  - code: PK   # Pakistan
+    blocked: true
+  - code: PL   # Poland
+    blocked: true
+  - code: PM   # Saint Pierre and Miquelon
+    blocked: true
+  - code: PN   # Pitcairn — no ipdeny zone file
+    blocked: false
+  - code: PR   # Puerto Rico
+    blocked: true
+  - code: PS   # Palestine
+    blocked: true
+  - code: PT   # Portugal
+    blocked: true
+  - code: PW   # Palau
+    blocked: true
+  - code: PY   # Paraguay
+    blocked: true
+  - code: QA   # Qatar
+    blocked: true
+  - code: RE   # Reunion
+    blocked: true
+  - code: RO   # Romania
+    blocked: true
+  - code: RS   # Serbia
+    blocked: true
+  - code: RU   # Russia
+    blocked: true
+  - code: RW   # Rwanda
+    blocked: true
+  - code: SA   # Saudi Arabia
+    blocked: true
+  - code: SB   # Solomon Islands
+    blocked: true
+  - code: SC   # Seychelles
+    blocked: true
+  - code: SD   # Sudan
+    blocked: true
+  - code: SE   # Sweden
+    blocked: true
+  - code: SG   # Singapore
+    blocked: true
+  - code: SH   # Saint Helena — no ipdeny zone file
+    blocked: false
+  - code: SI   # Slovenia
+    blocked: true
+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
+    blocked: false
+  - code: SK   # Slovakia
+    blocked: true
+  - code: SL   # Sierra Leone
+    blocked: true
+  - code: SM   # San Marino
+    blocked: true
+  - code: SN   # Senegal
+    blocked: true
+  - code: SO   # Somalia
+    blocked: true
+  - code: SR   # Suriname
+    blocked: true
+  - code: SS   # South Sudan
+    blocked: true
+  - code: ST   # Sao Tome and Principe
+    blocked: true
+  - code: SV   # El Salvador
+    blocked: true
+  - code: SX   # Sint Maarten
+    blocked: true
+  - code: SY   # Syria
+    blocked: true
+  - code: SZ   # Eswatini
+    blocked: true
+  - code: TC   # Turks and Caicos Islands
+    blocked: true
+  - code: TD   # Chad
+    blocked: true
+  - code: TF   # French Southern Territories — no ipdeny zone file
+    blocked: false
+  - code: TG   # Togo
+    blocked: true
+  - code: TH   # Thailand
+    blocked: true
+  - code: TJ   # Tajikistan
+    blocked: true
+  - code: TK   # Tokelau
+    blocked: true
+  - code: TL   # Timor-Leste
+    blocked: true
+  - code: TM   # Turkmenistan
+    blocked: true
+  - code: TN   # Tunisia
+    blocked: true
+  - code: TO   # Tonga
+    blocked: true
+  - code: TR   # Turkey
+    blocked: true
+  - code: TT   # Trinidad and Tobago
+    blocked: true
+  - code: TV   # Tuvalu
+    blocked: true
+  - code: TW   # Taiwan
+    blocked: true
+  - code: TZ   # Tanzania
+    blocked: true
+  - code: UA   # Ukraine
+    blocked: true
+  - code: UG   # Uganda
+    blocked: true
+  - code: UM   # US Minor Outlying Islands
+    blocked: true
+  - code: US   # United States
+    blocked: false
+  - code: UY   # Uruguay
+    blocked: true
+  - code: UZ   # Uzbekistan
+    blocked: true
+  - code: VA   # Vatican City
+    blocked: true
+  - code: VC   # Saint Vincent and the Grenadines
+    blocked: true
+  - code: VE   # Venezuela
+    blocked: true
+  - code: VG   # British Virgin Islands
+    blocked: true
+  - code: VI   # US Virgin Islands
+    blocked: true
+  - code: VN   # Vietnam
+    blocked: true
+  - code: VU   # Vanuatu
+    blocked: true
+  - code: WF   # Wallis and Futuna
+    blocked: true
+  - code: WS   # Samoa
+    blocked: true
+  - code: XK   # Kosovo — no ipdeny zone file
+    blocked: false
+  - code: YE   # Yemen
+    blocked: true
+  - code: YT   # Mayotte
+    blocked: true
+  - code: ZA   # South Africa
+    blocked: true
+  - code: ZM   # Zambia
+    blocked: true
+  - code: ZW   # Zimbabwe
+    blocked: true

+ 4 - 0
tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml

@@ -0,0 +1,4 @@
+---
+- name: reload nftables
+  ansible.builtin.command: nft -f {{ geo_nft_file }}
+  changed_when: true

+ 103 - 0
tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml

@@ -0,0 +1,103 @@
+---
+- name: Ensure nftables.d directory exists
+  ansible.builtin.file:
+    path: "{{ geo_nft_table_dir }}"
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Create temp directory for zone files
+  ansible.builtin.tempfile:
+    state: directory
+    suffix: geo_zones
+  register: geo_temp_dir
+
+# --- Source: live download ---
+
+- name: Test connectivity to ipdeny.com (fast pre-check)
+  ansible.builtin.uri:
+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
+    method: HEAD
+    timeout: 8
+  register: geo_connectivity_check
+  ignore_errors: yes
+  when: geo_zone_files_dir | length == 0
+
+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
+  ansible.builtin.fail:
+    msg: >-
+      Cannot reach ipdeny.com (connection timed out or refused) and
+      geo_zone_files_dir is not set. Pre-download zone files on a machine
+      with internet access using scripts/download-geo-zones.sh, copy them
+      to this host, then set geo_zone_files_dir in inventory or with -e.
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is failed
+
+- name: Download zone files for blocked countries
+  ansible.builtin.get_url:
+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    timeout: 30
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when:
+    - geo_zone_files_dir | length == 0
+    - geo_connectivity_check is succeeded
+
+# --- Source: local pre-downloaded cache ---
+
+- name: Copy zone files from local cache directory
+  ansible.builtin.copy:
+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
+    remote_src: yes
+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
+  loop_control:
+    label: "{{ item.code }}"
+  ignore_errors: yes
+  when: geo_zone_files_dir | length > 0
+
+# --- Assemble and deploy ---
+
+- name: Assemble all CIDRs from downloaded zone files
+  ansible.builtin.shell: >
+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
+    grep -v '^#' | grep -v '^$' | sort -u
+  register: geo_cidrs_raw
+  changed_when: false
+
+- name: Set geo_blocked_cidrs fact
+  ansible.builtin.set_fact:
+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
+
+- name: Deploy geo-block nftables ruleset
+  ansible.builtin.template:
+    src: geo-block.nft.j2
+    dest: "{{ geo_nft_file }}"
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nftables
+
+- name: Ensure nftables.conf includes geo-block.nft
+  ansible.builtin.lineinfile:
+    path: /etc/sysconfig/nftables.conf
+    line: 'include "{{ geo_nft_file }}"'
+    state: present
+    backup: yes
+
+- name: Enable and start nftables service
+  ansible.builtin.service:
+    name: nftables
+    state: started
+    enabled: yes
+
+- name: Clean up temp directory
+  ansible.builtin.file:
+    path: "{{ geo_temp_dir.path }}"
+    state: absent

+ 26 - 0
tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2

@@ -0,0 +1,26 @@
+#!/usr/sbin/nft -f
+# Managed by Ansible — do not edit manually
+
+# Ensure table exists, then flush for idempotency
+add table inet geo_block
+flush table inet geo_block
+
+table inet geo_block {
+    set blocked_countries {
+        type ipv4_addr
+        flags interval
+{% if geo_blocked_cidrs | length > 0 %}
+        elements = {
+{% for cidr in geo_blocked_cidrs %}
+            {{ cidr }}{% if not loop.last %},{% endif %}
+
+{% endfor %}
+        }
+{% endif %}
+    }
+
+    chain prerouting {
+        type filter hook prerouting priority -100; policy accept;
+        ip saddr @blocked_countries drop
+    }
+}

+ 31 - 0
tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml

@@ -0,0 +1,31 @@
+---
+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
+nginx_hsts_max_age: 31536000
+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
+nginx_client_max_body_size: "10m"
+nginx_proxy_read_timeout: 60
+
+nginx_redirect_services:
+  - name: gogs
+    server_name: gogs.tftsr.com
+  - name: homeassist
+    server_name: homeassist.tftsr.com
+  - name: kimai
+    server_name: kimai.tftsr.com
+  - name: ollama-ui
+    server_name: ollama-ui.tftsr.com
+  - name: overseerr
+    server_name: overseerr.tftsr.com
+  - name: plex
+    server_name: plex.tftsr.com
+  - name: portainer
+    server_name: portainer.tftsr.com
+  - name: radarr
+    server_name: radarr.tftsr.com
+  - name: retro
+    server_name: retro.tftsr.com
+  - name: sonarr
+    server_name: sonarr.tftsr.com
+  - name: trilium
+    server_name: trilium.tftsr.com

+ 5 - 0
tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml

@@ -0,0 +1,5 @@
+---
+- name: reload nginx
+  ansible.builtin.service:
+    name: nginx
+    state: reloaded

+ 44 - 0
tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml

@@ -0,0 +1,44 @@
+---
+- name: Deploy security headers configuration
+  ansible.builtin.template:
+    src: security_headers.conf.j2
+    dest: /etc/nginx/conf.d/00-security-headers.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy SSL parameters configuration
+  ansible.builtin.template:
+    src: ssl_params.conf.j2
+    dest: /etc/nginx/conf.d/00-ssl-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy proxy parameters configuration
+  ansible.builtin.template:
+    src: proxy_params.conf.j2
+    dest: /etc/nginx/conf.d/00-proxy-params.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Deploy HTTP to HTTPS redirect configuration
+  ansible.builtin.template:
+    src: http_redirect.conf.j2
+    dest: /etc/nginx/conf.d/00-http-redirects.conf
+    owner: root
+    group: root
+    mode: '0644'
+    backup: yes
+  notify: reload nginx
+
+- name: Validate NGINX configuration
+  ansible.builtin.command: nginx -t
+  changed_when: false

+ 8 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+{% for svc in nginx_redirect_services %}
+server {
+    listen 80;
+    server_name {{ svc.server_name }};
+    return 301 https://$host$request_uri;
+}
+{% endfor %}

+ 8 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2

@@ -0,0 +1,8 @@
+# Managed by Ansible — do not edit manually
+
+proxy_hide_header X-Powered-By;
+proxy_hide_header Server;
+proxy_set_header X-Real-IP $remote_addr;
+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+proxy_set_header X-Forwarded-Proto $scheme;
+proxy_read_timeout {{ nginx_proxy_read_timeout }};

+ 17 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2

@@ -0,0 +1,17 @@
+# Managed by Ansible — do not edit manually
+
+server_tokens off;
+
+# Rate limiting zone definition
+limit_req_zone {{ nginx_rate_limit_req_zone }};
+
+# Client body size limit
+client_max_body_size {{ nginx_client_max_body_size }};
+
+# Security headers
+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
+add_header X-Frame-Options SAMEORIGIN always;
+add_header X-Content-Type-Options nosniff always;
+add_header Referrer-Policy strict-origin-when-cross-origin always;
+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
+add_header X-XSS-Protection "1; mode=block" always;

+ 10 - 0
tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2

@@ -0,0 +1,10 @@
+# Managed by Ansible — do not edit manually
+
+ssl_protocols {{ nginx_ssl_protocols }};
+ssl_ciphers {{ nginx_ssl_ciphers }};
+ssl_prefer_server_ciphers off;
+ssl_session_timeout 1d;
+ssl_stapling on;
+ssl_stapling_verify on;
+resolver 8.8.8.8 8.8.4.4 valid=300s;
+resolver_timeout 5s;

+ 71 - 0
tftsr_nginx-hardening/scripts/download-geo-zones.sh

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Download ipdeny.com aggregated zone files for all blocked countries.
+# Run this on a machine WITH internet access, then rsync the output
+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
+#
+# Usage:
+#   ./scripts/download-geo-zones.sh [output-dir]
+#
+# Example workflow:
+#   # On your workstation:
+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
+#
+#   # Then run the playbook pointing at the cache:
+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
+
+set -euo pipefail
+
+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
+OUT_DIR="${1:-/tmp/geo_zones}"
+
+# All blocked country codes (excludes US and ipdeny-absent territories)
+COUNTRIES=(
+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
+  DE DJ DK DM DO DZ
+  EC EE EG ER ES ET
+  FI FJ FK FM FO FR
+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
+  HK HN HR HT HU
+  ID IE IL IM IN IO IQ IR IS IT
+  JE JM JO JP
+  KE KG KH KI KM KN KP KR KW KY KZ
+  LA LB LC LI LK LR LS LT LU LV LY
+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
+  NA NC NE NF NG NI NL NO NP NR NU NZ
+  OM
+  PA PE PF PG PH PK PL PM PR PS PT PW PY
+  QA
+  RE RO RS RU RW
+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
+  UA UG UM UY UZ
+  VA VC VE VG VI VN VU
+  WF WS
+  YE YT
+  ZA ZM ZW
+)
+
+mkdir -p "$OUT_DIR"
+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
+
+ok=0; fail=0
+for cc in "${COUNTRIES[@]}"; do
+  url="${BASE_URL}/${cc,,}-aggregated.zone"
+  dest="${OUT_DIR}/${cc,,}.zone"
+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
+    (( ++ok ))
+  else
+    echo "  SKIP $cc (no zone file at ipdeny.com)"
+    rm -f "$dest"
+    (( ++fail ))
+  fi
+done
+
+echo "Done: $ok downloaded, $fail skipped."
+echo ""
+echo "Next steps:"
+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"

+ 8 - 0
tftsr_nginx-hardening/site.yml

@@ -0,0 +1,8 @@
+---
+- hosts: localhost
+  connection: local
+  become: true
+  roles:
+    - nginx_hardening
+    - fail2ban
+    - geo_blocking