1 день тому · 342cbd123d
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,22 +6,26 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
				 
			
 
				 ```bash
			
 
				 # Full deployment
			
 
				-ansible-playbook playbooks/site.yml
			
 
				+ansible-playbook playbooks/site.yml -K -e @local.yml
			
 
				 
			
 
				 # Run a single playbook
			
 
				-ansible-playbook playbooks/03_benchmark.yml
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
			
 
				 
			
 
				 # Run with tags (each playbook defines granular tags)
			
 
				-ansible-playbook playbooks/site.yml --tags ollama,docker
			
 
				+ansible-playbook playbooks/site.yml --tags ollama,docker -K -e @local.yml
			
 
				 
			
 
				 # Benchmark and update warm-up slots in one shot
			
 
				-ansible-playbook playbooks/03_benchmark.yml && ansible-playbook playbooks/04_models.yml
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				 
			
 
				-# Override slot 4 with a specific model
			
 
				-ansible-playbook playbooks/04_models.yml -e "slot4_model=qwen2.5-coder:7b"
			
 
				+# Rotate general slot (Node 1, port 11434)
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
			
 
				+
			
 
				+# Rotate coding slot (Node 0, port 11435)
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
			
 
				 
			
 
				 # Run against a subset of hosts
			
 
				-ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy
			
 
				+ansible-playbook playbooks/09_nginx.yml --limit nginx_proxy -K -e @local.yml
			
 
				 
			
 
				 # Lint playbooks
			
 
				 ansible-lint playbooks/
			
@@ -30,7 +34,7 @@ ansible-lint playbooks/
 
				 ansible-galaxy collection install -r requirements.yml
			
 
				 
			
 
				 # Check mode (dry run)
			
 
				-ansible-playbook playbooks/site.yml --check --diff
			
 
				+ansible-playbook playbooks/site.yml --check --diff -K -e @local.yml
			
 
				 ```
			
 
				 
			
 
				 ## Required Local Configuration
			
@@ -87,17 +91,20 @@ All credentials live exclusively in Vault under `secret/data/{{ vault_project_sl
 
				 
			
 
				 **Composite score formula:**
			
 
				 ```
			
 
				-composite = (quality × 0.45) + (tokens_per_sec / 30, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
			
 
				+composite = (quality × 0.45) + (tokens_per_sec / ceiling, capped at 1.0) × 0.30 + (1 - ttft_ms/5000, floored at 0) × 0.25
			
 
				 ```
			
 
				+`benchmark_toks_norm_ceiling` defaults to 40 (dual-socket target).
			
 
				+
			
 
				+**Slot classification:** if `coding_composite - general_composite >= 0.10` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
			
 
				 
			
 
				-**Slot classification:** if `coding_composite - general_composite >= 0.15` (configurable via `benchmark_coding_threshold`), model goes to a coding slot; otherwise general.
			
 
				+**6 warm-up slots across two NUMA instances:**
			
 
				+- Node 1 (port 11434): slots 1–2 locked general + slot 5 rotatable general
			
 
				+- Node 0 (port 11435): slots 3–4 locked coding + slot 6 rotatable coding
			
 
				+- Slots 5/6 rotatable via `-e slot5_model=<name>` / `-e slot6_model=<name>` without re-benchmarking
			
 
				 
			
 
				-**4 warm-up slots always hot in RAM:**
			
 
				-- Slots 1–2: top general-purpose models by composite score
			
 
				-- Slots 3–4: top coding models by composite score
			
 
				-- Slot 4 is user-rotatable via `-e slot4_model=<name>` without re-benchmarking
			
 
				+`04_models.yml` creates Modelfiles (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) and two warmup services: `ollama-warmup.service` (Node 1) and `ollama-warmup-node0.service` (Node 0).
			
 
				 
			
 
				-`04_models.yml` creates named Ollama Modelfiles (`coder-128k`, `coder-32k`, `llama-family`, `gemma-family`) and a `ollama-warmup.service` systemd one-shot that pre-loads all 4 slots after Ollama starts.
			
 
				+**Benchmark alias filter:** `benchmark_skip_aliases` in `group_vars/all.yml` lists the Modelfile aliases — the benchmark playbook excludes these from the test loop to prevent 32k-token KV-cache allocations from stalling the run.
			
 
				 
			
 
				 ### Key Variables
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ bot access -- all driven by a single `ansible-playbook deploy_ai.yml` command.
 
				           ┌───────────────▼┐    ┌────▼──────────────────────┐
			
 
				           │ coredns_host   │    │ ai_server                 │
			
 
				           │ 192.168.1.29   │    │ 192.168.1.100             │
			
 
				-          │                │    │                            │
			
 
				+          │                │    │                           │
			
 
				           │ - CoreDNS      │    │ - Ollama (LLM inference)  │
			
 
				           └────────────────┘    │ - Open WebUI              │
			
 
				                                 │ - Keycloak (SSO/OIDC)     │
			
@@ -292,11 +292,13 @@ The benchmark playbook automatically selects the best coding models and keeps th
 
				 Check the current slot assignments in `benchmarks/results/model_selection.json`:
			
 
				 
			
 
				 ```bash
			
 
				-cat benchmarks/results/model_selection.json | python3 -m json.tool | grep slot
			
 
				+python3 -m json.tool benchmarks/results/model_selection.json | grep slot
			
 
				 ```
			
 
				 
			
 
				-Slots 3 and 4 are always coding-classified models. Use the `slot3_coding` model for
			
 
				-primary work and `slot4_coding` for a lighter/faster alternative.
			
 
				+Slots 3–6 are coding-classified models, all running on the Node 0 instance at port 11435.
			
 
				+Use `slot3_coding` (the highest-scoring coding model) as your primary model. Connect coding
			
 
				+tools directly to `https://ollama-api.<domain>` (proxied from port 11434, Node 1) or to
			
 
				+Open WebUI which load-balances across both instances.
			
 
				 
			
 
				 ## Day-2 Operations
			
 
				 
			
@@ -343,6 +345,13 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
 
				   -e "benchmark_models=qwen2.5-coder:14b-instruct-q4_K_M,codestral:22b-v0.1-q4_K_M"
			
 
				 ```
			
 
				 
			
 
				+**Override tier boundaries or timeouts (see [benchmarks/README.md](benchmarks/README.md#three-pass-execution)):**
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
			
 
				+```
			
 
				+
			
 
				 **Pull recommended models if scores are below threshold:**
			
 
				 
			
 
				 ```bash
			
@@ -355,10 +364,20 @@ ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml -e "pull_if_better=
 
				 ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				 ```
			
 
				 
			
 
				-**Rotate slot 4 to a specific model:**
			
 
				+**Rotate slot 5 (general) or slot 6 (coding) to a specific model:**
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot4_model=deepseek-r1:14b"
			
 
				+# Swap general rotate slot
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
			
 
				+
			
 
				+# Swap coding rotate slot
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
			
 
				+
			
 
				+# Both at once
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
			
 
				+
			
 
				+# Reset both rotate slots back to benchmark recommendations
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				 ```
			
 
				 
			
 
				 **Redeploy Keycloak only:**
			
@@ -393,16 +412,25 @@ ansible-playbook playbooks/11_vault_oidc.yml -K -e @local.yml
 
				 
			
 
				 ## Model Slot System
			
 
				 
			
 
				-Four models are kept warm in RAM at all times (`OLLAMA_MAX_LOADED_MODELS=4`, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled by the benchmark playbook — no model names are hardcoded.
			
 
				+Six models are kept warm across two Ollama instances (`OLLAMA_MAX_LOADED_MODELS=3` each, `OLLAMA_KEEP_ALIVE=-1`). Slots are filled automatically by the benchmark playbook — no model names are hardcoded.
			
 
				+
			
 
				+```
			
 
				+NUMA Node 1 — ollama.service     — port 11434  (general models)
			
 
				+NUMA Node 0 — ollama-node0.service — port 11435 (coding models)
			
 
				+```
			
 
				+
			
 
				+| Slot | Instance      | Port  | Role                    | Selection                     | Rotation                                    |
			
 
				+|------|---------------|-------|-------------------------|-------------------------------|---------------------------------------------|
			
 
				+| 1    | Node 1        | 11434 | General primary (locked) | Top general composite score  | Replaced only by re-benchmark               |
			
 
				+| 2    | Node 1        | 11434 | General secondary (locked)| 2nd general composite score | Replaced only by re-benchmark               |
			
 
				+| 5    | Node 1        | 11434 | General rotate           | 3rd general composite score   | `-e slot5_model=<name>`                     |
			
 
				+| 3    | Node 0        | 11435 | Coding primary (locked)  | Top coding composite score    | Replaced only by re-benchmark               |
			
 
				+| 4    | Node 0        | 11435 | Coding secondary (locked)| 2nd coding composite score    | Replaced only by re-benchmark               |
			
 
				+| 6    | Node 0        | 11435 | Coding rotate            | 3rd coding composite score    | `-e slot6_model=<name>`                     |
			
 
				 
			
 
				-| Slot | Role                      | Selection                     | Rotation                              |
			
 
				-|------|---------------------------|-------------------------------|---------------------------------------|
			
 
				-| 1    | General-purpose primary   | Top general composite score   | Replaced if score < threshold         |
			
 
				-| 2    | General-purpose secondary | 2nd general composite score   | Replaced if score < threshold         |
			
 
				-| 3    | Coding primary            | Top coding composite score    | Locked; replaced only by re-benchmark |
			
 
				-| 4    | Coding secondary          | 2nd coding composite score    | Rotatable: `-e slot4_model=<name>`    |
			
 
				+**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.10; otherwise `general`.
			
 
				 
			
 
				-**Classification rule:** a model is classified `coding` if its coding composite score exceeds its general composite score by ≥ 0.15; otherwise `general`.
			
 
				+**Modelfile aliases** (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`, `gemma-family`) are excluded from benchmarking to prevent KV-cache allocation stalls.
			
 
				 
			
 
				 ## Verification Steps
			
 
				 
			
@@ -416,8 +444,10 @@ After a full `deploy_ai.yml` run, verify the deployment (substitute your actual
 
				 6. **Qdrant health** -- `curl -s http://<ai_server_ip>:6333/healthz` returns OK
			
 
				 7. **CoreDNS resolution** -- `dig @<coredns_host_ip> vault.example.com` returns `<nginx_proxy_ip>`
			
 
				 8. **NGINX configs** -- `ssh <nginx_proxy_ip> 'sudo nginx -t'` passes
			
 
				-9. **OpenClaw** -- send a message to the Telegram bot, confirm response
			
 
				+9. **OpenClaw** -- send a message to the Telegram bot, confirm response using slot1_general model
			
 
				 10. **Benchmark report** -- check `benchmarks/results/benchmark_<timestamp>.md` for latest results
			
 
				+11. **Node 0 Ollama** -- `curl -s -H "Authorization: Bearer <key>" http://<ai_server_ip>:11435/api/tags` returns model list
			
 
				+12. **Both warmup services** -- `systemctl status ollama-warmup ollama-warmup-node0` both show `active (exited)`
			
 
				 
			
 
				 ## Role Reference
			
 
				 
			
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -3,133 +3,184 @@
 
				 ## Overview
			
 
				 
			
 
				 Dynamic benchmark system for all installed Ollama models. Runs a suite of coding and
			
 
				-general-purpose tests against every model currently available on the Ollama server,
			
 
				-scores each model on a composite metric, and assigns models to the 4-slot system
			
 
				-based on results.
			
 
				+general-purpose tests against every model on the Ollama server, scores each model on a
			
 
				+composite metric, and assigns models to the 6-slot dual-socket system based on results.
			
 
				+
			
 
				+Modelfile aliases (`coder-128k`, `coder-32k`, `coder-rotate`, `llama-family`,
			
 
				+`gemma-family`) are automatically excluded from benchmarking — they share weights with
			
 
				+real models and their large context window parameters would stall every run with
			
 
				+285-second KV-cache allocations.
			
 
				 
			
 
				 ## How to Run
			
 
				 
			
 
				 **Benchmark all installed models:**
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/05_benchmark.yml
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml
			
 
				 ```
			
 
				 
			
 
				 **Benchmark specific models only:**
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/05_benchmark.yml -e '{"benchmark_specific_models":["qwen2.5-coder:14b","deepseek-coder-v2:16b"]}'
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_models=qwen2.5-coder:14b,deepseek-coder-v2:16b"
			
 
				 ```
			
 
				 
			
 
				-**Benchmark with automatic model pulling if a better model is found:**
			
 
				+**Benchmark and immediately push 6-slot warm-up selections:**
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/05_benchmark.yml -e pull_if_better=true
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+## Three-Pass Execution
			
 
				+
			
 
				+Models are split into three size tiers before benchmarking. Each tier gets its own
			
 
				+per-request timeout to avoid small models waiting behind 70 B giants:
			
 
				+
			
 
				+| Tier   | RAM threshold | Timeout | Description                       |
			
 
				+|--------|---------------|---------|-----------------------------------|
			
 
				+| Small  | < 10 GB       | 300 s   | 7 B and under — fast path         |
			
 
				+| Medium | 10–15 GB      | 900 s   | 16 B lite / 12 B — standard wait  |
			
 
				+| Large  | > 15 GB       | 1200 s  | 34 B+ — 20-minute ceiling         |
			
 
				+
			
 
				+**Size source vs runtime RAM:** `ollama list` reports on-disk (compressed) sizes, which
			
 
				+are smaller than actual runtime RAM usage (model weights + KV cache + overhead). A
			
 
				+`benchmark_size_overhead_factor` (default `1.2`) is applied when computing tier
			
 
				+boundaries: the disk-size cutoffs are divided by the factor before comparison. For
			
 
				+example, with default settings a 9 GB on-disk model is treated as ~10.8 GB at runtime
			
 
				+and falls in the medium tier rather than small.
			
 
				+
			
 
				+**Override tier boundaries:**
			
 
				+
			
 
				+```bash
			
 
				+# Adjust where small/medium boundary sits
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_small_max_gb=8 benchmark_medium_max_gb=20"
			
 
				+
			
 
				+# Tune the overhead factor if your models load larger/smaller than expected
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_size_overhead_factor=1.25"
			
 
				+
			
 
				+# Override timeouts only
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml \
			
 
				+  -e "benchmark_medium_timeout=600 benchmark_large_timeout=1800"
			
 
				 ```
			
 
				 
			
 
				 ## Test Suites
			
 
				 
			
 
				 ### Coding Tests
			
 
				 
			
 
				-| Test       | Prompt                                                         | What Is Measured              |
			
 
				-|------------|----------------------------------------------------------------|-------------------------------|
			
 
				-| `code_gen` | "Write a Python function that implements binary search on a sorted list. Include type hints and docstring." | Correctness (def + return present), code structure, tokens/sec |
			
 
				-| `debug`    | "Find and fix the bug in this Python code: `def factorial(n): return n * factorial(n)`. Explain the issue." | Identifies base case bug, explanation quality, tokens/sec |
			
 
				-| `refactor` | "Refactor this code to use list comprehension: `result = []; for i in range(10): if i % 2 == 0: result.append(i*i)`" | Produces list comprehension, conciseness, tokens/sec |
			
 
				+| Test       | Prompt                                                                     | What Is Measured                                   |
			
 
				+|------------|----------------------------------------------------------------------------|----------------------------------------------------|
			
 
				+| `code_gen` | Write a Python merge sort with type hints, docstring, and 3 unit tests     | `def`, `return`, `"""`, `->`, `assert`, `def test_`, `import` |
			
 
				+| `debug`    | Find and fix 3 bugs in a given Python function                             | `def`, `return`, code block, `assert`              |
			
 
				+| `refactor` | Refactor a loop for readability and performance                            | `def`, `return`, code block, type hint, `import`   |
			
 
				 
			
 
				 ### General Tests
			
 
				 
			
 
				-| Test        | Prompt                                                        | What Is Measured              |
			
 
				-|-------------|---------------------------------------------------------------|-------------------------------|
			
 
				-| `explain`   | "Explain the concept of recursion to a beginner programmer. Use a simple analogy." | Clarity, analogy presence, length adequacy, tokens/sec |
			
 
				-| `creative`  | "Write a short poem about artificial intelligence."           | Creativity (line count, poetic structure), tokens/sec |
			
 
				-| `reasoning` | "A farmer has 17 sheep. All but 9 die. How many are left? Explain your reasoning step by step." | Correct answer (9), step-by-step reasoning, tokens/sec |
			
 
				+| Test        | Prompt                                                     | What Is Measured                                     |
			
 
				+|-------------|------------------------------------------------------------|------------------------------------------------------|
			
 
				+| `explain`   | Explain how Python's GIL works and when it matters         | Response length, paragraph structure, list formatting |
			
 
				+| `creative`  | Suggest 5 fun family activities for a rainy weekend        | Response length, paragraph structure, list formatting |
			
 
				+| `reasoning` | Apple arithmetic word problem                              | Response length, paragraph structure, list formatting |
			
 
				 
			
 
				 ### Latency Test
			
 
				 
			
 
				-| Test      | Prompt | What Is Measured           |
			
 
				-|-----------|--------|----------------------------|
			
 
				-| `latency` | "Hi"   | Time to first token (TTFT) |
			
 
				+| Test      | Prompt | What Is Measured                                   |
			
 
				+|-----------|--------|----------------------------------------------------|
			
 
				+| `latency` | "Hi"   | Total response time (eval + prompt eval), used as TTFT proxy |
			
 
				 
			
 
				 ## Scoring
			
 
				 
			
 
				-### Metrics Collected from Ollama API
			
 
				-
			
 
				-- **tokens/sec** -- generation throughput from `/api/generate` response
			
 
				-- **TTFT** (time to first token) -- measured from request start to first streamed token
			
 
				-- **Quality heuristics** -- regex and length checks specific to each test type
			
 
				-
			
 
				 ### Composite Score Formula
			
 
				 
			
 
				 For each category (coding, general), a composite score is calculated:
			
 
				 
			
 
				 ```
			
 
				-composite = (quality * 0.45) + (tokens_per_sec_normalized * 0.30) + (latency_score * 0.25)
			
 
				+composite = (quality * 0.45) + (tokens_per_sec / ceiling, capped 1.0) * 0.30
			
 
				+          + (1 - ttft_ms / 5000, floored 0) * 0.25
			
 
				 ```
			
 
				 
			
 
				 Where:
			
 
				-- `quality` is 0.0-1.0 based on heuristic checks for the test type
			
 
				-- `tokens_per_sec_normalized` is the model's tokens/sec divided by the fastest model's tokens/sec
			
 
				-- `latency_score` is 1.0 - (model_ttft / slowest_ttft)
			
 
				+- `quality` — 0.0–1.0 from heuristic checks per test type (see CLAUDE.md for weights)
			
 
				+- `tokens_per_sec` — averaged across all test responses; normalized against `benchmark_toks_norm_ceiling` (default 40)
			
 
				+- `ttft_ms` — latency test response time in milliseconds
			
 
				 
			
 
				 ### Classification Rule
			
 
				 
			
 
				-A model is classified as a **coding** model if:
			
 
				+A model is classified as **coding** if:
			
 
				 
			
 
				 ```
			
 
				-coding_composite - general_composite >= 0.15
			
 
				+coding_composite - general_composite >= benchmark_coding_threshold   # default 0.10
			
 
				 ```
			
 
				 
			
 
				-Otherwise it is classified as **general**.
			
 
				+Name-pattern heuristics (`coder`, `codestral`, `codellama`, `starcoder`) apply as a
			
 
				+tiebreaker. Category can also be forced with `model_category_overrides` in `group_vars/all.yml`.
			
 
				 
			
 
				 ## Thresholds and Configuration
			
 
				 
			
 
				-All thresholds are configurable via `group_vars/all.yml`:
			
 
				-
			
 
				-| Key                            | Default | Description                                    |
			
 
				-|--------------------------------|---------|------------------------------------------------|
			
 
				-| `benchmark_min_tokens_per_sec` | 10      | Minimum tokens/sec to pass a model             |
			
 
				-| `benchmark_max_ttft_ms`        | 5000    | Maximum time to first token in milliseconds    |
			
 
				-| `benchmark_quality_weight`     | 0.45    | Weight of quality score in composite            |
			
 
				-| `benchmark_speed_weight`       | 0.30    | Weight of tokens/sec in composite               |
			
 
				-| `benchmark_latency_weight`     | 0.25    | Weight of latency score in composite            |
			
 
				-| `benchmark_coding_threshold`   | 0.15    | Minimum coding-general delta for coding classification |
			
 
				+All thresholds are configurable in `inventory/group_vars/all.yml`:
			
 
				+
			
 
				+| Key                               | Default | Description                                            |
			
 
				+|-----------------------------------|---------|--------------------------------------------------------|
			
 
				+| `benchmark_thresholds.min_tokens_per_sec`  | 5.0  | Minimum tok/sec to be slot-eligible          |
			
 
				+| `benchmark_thresholds.min_quality_score`   | 0.6  | Minimum quality score to be slot-eligible    |
			
 
				+| `benchmark_thresholds.min_composite_score` | 0.55 | Minimum composite to avoid threshold warning |
			
 
				+| `benchmark_toks_norm_ceiling`     | 40      | tok/sec ceiling for normalization (dual-socket target) |
			
 
				+| `benchmark_coding_threshold`      | 0.10    | coding-general composite delta for classification      |
			
 
				+| `benchmark_small_max_gb`          | 10      | Runtime RAM upper bound for small pass (GB)            |
			
 
				+| `benchmark_medium_max_gb`         | 15      | Runtime RAM upper bound for medium pass (GB)           |
			
 
				+| `benchmark_size_overhead_factor`  | 1.2     | Multiplier applied to `ollama list` disk sizes to estimate runtime RAM |
			
 
				+| `benchmark_small_timeout`         | 300     | Per-request timeout for small models (seconds)         |
			
 
				+| `benchmark_medium_timeout`        | 900     | Per-request timeout for medium models (seconds)        |
			
 
				+| `benchmark_large_timeout`         | 1200    | Per-request timeout for large models (seconds)         |
			
 
				+| `benchmark_skip_aliases`          | see below| Modelfile aliases excluded from benchmark loop        |
			
 
				+
			
 
				+Default `benchmark_skip_aliases`:
			
 
				+```yaml
			
 
				+- coder-128k
			
 
				+- coder-32k
			
 
				+- coder-rotate
			
 
				+- llama-family
			
 
				+- gemma-family
			
 
				+```
			
 
				 
			
 
				 ## Output Format
			
 
				 
			
 
				 ### Benchmark Report
			
 
				 
			
 
				-Each run produces `benchmarks/benchmark_<timestamp>.md` with a results table:
			
 
				+Each run produces `benchmarks/results/benchmark_<timestamp>.md`. The slot table now
			
 
				+covers all 6 slots across both NUMA instances:
			
 
				 
			
 
				 ```
			
 
				-| Model                  | Coding Composite | General Composite | Classification | Tokens/sec | TTFT (ms) |
			
 
				-|------------------------|------------------|-------------------|----------------|------------|-----------|
			
 
				-| qwen2.5-coder:14b      | 0.82             | 0.65              | coding         | 38.2       | 420       |
			
 
				-| deepseek-coder-v2:16b  | 0.78             | 0.63              | coding         | 35.1       | 510       |
			
 
				-| llama3.1:8b            | 0.61             | 0.74              | general        | 52.3       | 280       |
			
 
				-| mistral:7b             | 0.58             | 0.71              | general        | 55.8       | 250       |
			
 
				+| Slot | Socket              | Role            | Model                     | Composite |
			
 
				+|------|---------------------|-----------------|---------------------------|-----------|
			
 
				+| 1    | Node 1 (port 11434) | General (locked)| llama3.1:8b               | 0.74      |
			
 
				+| 2    | Node 1 (port 11434) | General (locked)| mistral:latest            | 0.71      |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate)| llama3.2:3b               | 0.63      |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b     | 0.82      |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b          | 0.78      |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate) | codegemma:7b              | 0.69      |
			
 
				 ```
			
 
				 
			
 
				-### Model Selection File
			
 
				+### model_selection.json
			
 
				 
			
 
				-Results are also written to `model_selection.json`:
			
 
				+Results are written to `benchmarks/results/model_selection.json`:
			
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-  "timestamp": "2025-01-15T10:30:00Z",
			
 
				-  "slot1_coding": "qwen2.5-coder:14b",
			
 
				-  "slot2_general": "llama3.1:8b",
			
 
				-  "slot3_backup": "deepseek-coder-v2:16b",
			
 
				-  "slot4_experimental": null,
			
 
				-  "results": { ... }
			
 
				+  "slot1_general": "llama3.1:8b",
			
 
				+  "slot2_general": "mistral:latest",
			
 
				+  "slot5_general_rotate": "llama3.2:3b",
			
 
				+  "slot3_coding": "deepseek-coder-v2:16b",
			
 
				+  "slot4_coding": "qwen2.5-coder:7b",
			
 
				+  "slot6_coding_rotate": "codegemma:7b",
			
 
				+  "general_ranking": [...],
			
 
				+  "coding_ranking": [...],
			
 
				+  "all_metrics": { ... }
			
 
				 }
			
 
				 ```
			
 
				 
			
 
				-## Slot Selection
			
 
				-
			
 
				-Slots are assigned from benchmark results as follows:
			
 
				-
			
 
				-1. **Slot 1 (Primary Coding)** -- model with the highest `coding_composite` score
			
 
				-2. **Slot 2 (Primary General)** -- model with the highest `general_composite` score
			
 
				-3. **Slot 3 (Secondary / Backup)** -- next-best model by overall average composite
			
 
				-4. **Slot 4 (Experimental)** -- not assigned by benchmarks; set manually via `-e slot4_model=<name>`
			
 
				+This file is read by `04_models.yml` to decide what to pull and warm up. It is committed
			
 
				+to the repo so slot selections survive a clean checkout.
			
--- a/benchmarks/results/benchmark_20260307T170059.md
+++ b/benchmarks/results/benchmark_20260307T170059.md
@@ -1,15 +1,20 @@
 
				 # Benchmark Results - 20260307T170059
			
 
				 
			
 
				 ## Model Selection
			
 
				-| Slot | Role | Model | Composite Score |
			
 
				-|------|------|-------|----------------|
			
 
				-| 1 | General (Primary) | llama3.2:3b | 0.967 |
			
 
				-| 2 | General (Secondary) | llama3.2:3b | 0.967 |
			
 
				-| 3 | Coding (Primary) | deepseek-coder-v2 | 0.738 |
			
 
				-| 4 | Coding (Secondary) | qwen2.5-coder:7b | 0.63 |
			
 
				+
			
 
				+
			
 
				+| Slot | Role                | Model             | Composite Score |
			
 
				+| ---- | ------------------- | ----------------- | --------------- |
			
 
				+| 1    | General (Primary)   | llama3.2:3b       | 0.967           |
			
 
				+| 2    | General (Secondary) | llama3.2:3b       | 0.967           |
			
 
				+| 3    | Coding (Primary)    | deepseek-coder-v2 | 0.738           |
			
 
				+| 4    | Coding (Secondary)  | qwen2.5-coder:7b  | 0.63            |
			
 
				+
			
 
				 
			
 
				 ## Detailed Metrics
			
 
				+
			
 
				 ### deepseek-coder-v2
			
 
				+
			
 
				 - **Category**: coding
			
 
				 - **Coding Quality**: 0.667
			
 
				 - **General Quality**: 0.918
			
@@ -17,7 +22,9 @@
 
				 - **Latency (ms)**: 1744.5
			
 
				 - **Coding Composite**: 0.738
			
 
				 - **General Composite**: 0.852
			
 
				+
			
 
				 ### qwen2.5-coder:7b
			
 
				+
			
 
				 - **Category**: coding
			
 
				 - **Coding Quality**: 0.64
			
 
				 - **General Quality**: 0.922
			
@@ -25,7 +32,9 @@
 
				 - **Latency (ms)**: 1211.5
			
 
				 - **Coding Composite**: 0.63
			
 
				 - **General Composite**: 0.757
			
 
				+
			
 
				 ### llama3.2:3b
			
 
				+
			
 
				 - **Category**: general
			
 
				 - **Coding Quality**: 0.607
			
 
				 - **General Quality**: 0.991
			
@@ -35,7 +44,9 @@
 
				 - **General Composite**: 0.967
			
 
				 
			
 
				 ## Scoring Formula
			
 
				+
			
 
				 - Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				 - Speed normalized against 22 tok/sec ceiling (hardware-observed max)
			
 
				 - Coding quality: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				 - Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260307T184212.md
+++ b/benchmarks/results/benchmark_20260307T184212.md
@@ -0,0 +1,92 @@
 
				+# Benchmark Results - 20260307T184212
			
 
				+
			
 
				+## Model Selection
			
 
				+| Slot | Role | Model | Composite Score |
			
 
				+|------|------|-------|----------------|
			
 
				+| 1 | General (Primary) | llama3.2:3b | 0.001 |
			
 
				+| 2 | General (Secondary) | gemma-family:latest | 0.0 |
			
 
				+| 3 | Coding (Primary) | coder-128k:latest | 0.001 |
			
 
				+| 4 | Coding (Secondary) | coder-32k:latest | 0.001 |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### gemma-family:latest
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### llama-family:latest
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### coder-128k:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 285394.5
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+### coder-32k:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 142328.6
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+### llama3.1:8b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### deepseek-coder-v2:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### qwen2.5-coder:7b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 143942.9
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### llama3.2:3b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 139756.5
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 22 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
--- a/benchmarks/results/benchmark_20260308T003605.md
+++ b/benchmarks/results/benchmark_20260308T003605.md
@@ -0,0 +1,147 @@
 
				+# Benchmark Results - 20260308T003605
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model                 | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.001           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | command-r:35b         | 0.0             |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | llama3.1:70b          | 0.0             |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | codellama:34b         | 0.0             |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.0             |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:14b     | 0.0             |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### codellama:34b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0.008
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 221414.9
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.004
			
 
				+
			
 
				+### deepseek-coder-v2:16b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### qwen2.5-coder:14b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 239690.0
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### deepseek-coder-v2:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### command-r:35b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 169971.8
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### llama3.1:70b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### mistral-nemo:latest
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### mistral:latest
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### llama3.1:8b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 130127.2
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260308T145246.md
+++ b/benchmarks/results/benchmark_20260308T145246.md
@@ -0,0 +1,70 @@
 
				+# Benchmark Results - 20260308T145246
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+| Slot | Socket | Role | Model | Composite Score |
			
 
				+|------|--------|------|-------|----------------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.0 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | mistral:latest | 0.0 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.0 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### mistral-nemo:latest
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### mistral:latest
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### llama3.1:8b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### qwen2.5-coder:7b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### llama3.2:3b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 109301.3
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
--- a/benchmarks/results/benchmark_20260308T215747.md
+++ b/benchmarks/results/benchmark_20260308T215747.md
@@ -0,0 +1,57 @@
 
				+# Benchmark Results - 20260308T215747
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model               | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | ------------------- | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b         | 0.45            |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | mistral-nemo:latest | 0.45            |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | none                | N/A             |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b    | 0.371           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                | N/A             |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.917
			
 
				+- **General Quality**: 1.0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.413
			
 
				+- **General Composite**: 0.45
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.823
			
 
				+- **General Quality**: 0.85
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.371
			
 
				+- **General Composite**: 0.383
			
 
				+
			
 
				+### mistral-nemo:latest
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 1.0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 9999
			
 
				+- **Coding Composite**: 0.383
			
 
				+- **General Composite**: 0.45
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260309T080551.md
+++ b/benchmarks/results/benchmark_20260309T080551.md
@@ -0,0 +1,54 @@
 
				+# Benchmark Results - 20260309T080551
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+| Slot | Socket | Role | Model | Composite Score |
			
 
				+|------|--------|------|-------|----------------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.001 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M | 0.0 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | none | N/A |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.316 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.0 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | none | N/A |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### deepseek-coder-v2:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 676104.3
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### llama3.2:3b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 154480.0
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.0
			
 
				+- **Latency (ms)**: 722357.3
			
 
				+- **Coding Composite**: 0.0
			
 
				+- **General Composite**: 0.0
			
 
				+### qwen2.5-coder:7b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.7
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 145493.5
			
 
				+- **Coding Composite**: 0.316
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
--- a/benchmarks/results/benchmark_20260309T174604.md
+++ b/benchmarks/results/benchmark_20260309T174604.md
@@ -0,0 +1,47 @@
 
				+# Benchmark Results - 20260309T174604
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model            | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | ---------------- | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | llama3.2:3b      | 0.001           |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | none             | N/A             |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b | 0.001           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | none             | N/A             |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 108021.2
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0
			
 
				+- **General Quality**: 0
			
 
				+- **Avg Tokens/sec**: 0.1
			
 
				+- **Latency (ms)**: 146781.6
			
 
				+- **Coding Composite**: 0.001
			
 
				+- **General Composite**: 0.001
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260310T094843.md
+++ b/benchmarks/results/benchmark_20260310T094843.md
@@ -0,0 +1,67 @@
 
				+# Benchmark Results - 20260310T094843
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model                    | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.814           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | gemma3:12b-it-q4_K_M     | 0.484           |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | none                     | N/A             |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.693           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b         | 0.638           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | none                     | N/A             |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### deepseek-coder-v2:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.783
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 22.8
			
 
				+- **Latency (ms)**: 1612.6
			
 
				+- **Coding Composite**: 0.693
			
 
				+- **General Composite**: 0.739
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 22.4
			
 
				+- **Latency (ms)**: 661.8
			
 
				+- **Coding Composite**: 0.767
			
 
				+- **General Composite**: 0.814
			
 
				+
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.5
			
 
				+- **Latency (ms)**: 5730.8
			
 
				+- **Coding Composite**: 0.431
			
 
				+- **General Composite**: 0.484
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.8
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.8
			
 
				+- **Latency (ms)**: 1359.5
			
 
				+- **Coding Composite**: 0.638
			
 
				+- **General Composite**: 0.687
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260310T102149.md
+++ b/benchmarks/results/benchmark_20260310T102149.md
@@ -0,0 +1,117 @@
 
				+# Benchmark Results - 20260310T102149
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model                    | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.819           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.621           |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.484           |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.707           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.681           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest     | 0.644           |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### codellama:34b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.783
			
 
				+- **General Quality**: 0.586
			
 
				+- **Avg Tokens/sec**: 3.2
			
 
				+- **Latency (ms)**: 4350.0
			
 
				+- **Coding Composite**: 0.409
			
 
				+- **General Composite**: 0.32
			
 
				+
			
 
				+### deepseek-coder-v2:16b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.783
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 24.6
			
 
				+- **Latency (ms)**: 1586.8
			
 
				+- **Coding Composite**: 0.707
			
 
				+- **General Composite**: 0.753
			
 
				+
			
 
				+### qwen2.5-coder:14B
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.8
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 2223.7
			
 
				+- **Coding Composite**: 0.549
			
 
				+- **General Composite**: 0.608
			
 
				+
			
 
				+### deepseek-coder-v2:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.783
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 22.2
			
 
				+- **Latency (ms)**: 1759.1
			
 
				+- **Coding Composite**: 0.681
			
 
				+- **General Composite**: 0.727
			
 
				+
			
 
				+### qwen2.5-coder:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.8
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.8
			
 
				+- **Latency (ms)**: 1239.2
			
 
				+- **Coding Composite**: 0.644
			
 
				+- **General Composite**: 0.694
			
 
				+
			
 
				+### llama3.1:8b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.8
			
 
				+- **General Quality**: 0.877
			
 
				+- **Avg Tokens/sec**: 11.8
			
 
				+- **Latency (ms)**: 2251.2
			
 
				+- **Coding Composite**: 0.586
			
 
				+- **General Composite**: 0.621
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.8
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.3
			
 
				+- **Latency (ms)**: 1258.3
			
 
				+- **Coding Composite**: 0.639
			
 
				+- **General Composite**: 0.689
			
 
				+
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 5701.3
			
 
				+- **Coding Composite**: 0.432
			
 
				+- **General Composite**: 0.484
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 22.7
			
 
				+- **Latency (ms)**: 613.5
			
 
				+- **Coding Composite**: 0.772
			
 
				+- **General Composite**: 0.819
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260310T110632.md
+++ b/benchmarks/results/benchmark_20260310T110632.md
@@ -0,0 +1,94 @@
 
				+# Benchmark Results - 20260310T110632
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+| Slot | Socket | Role | Model | Composite Score |
			
 
				+|------|--------|------|-------|----------------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### codellama:34b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.586
			
 
				+- **Avg Tokens/sec**: 3.2
			
 
				+- **Latency (ms)**: 4244.1
			
 
				+- **Coding Composite**: 0.437
			
 
				+- **General Composite**: 0.326
			
 
				+### deepseek-coder-v2:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 25.0
			
 
				+- **Latency (ms)**: 1543.2
			
 
				+- **Coding Composite**: 0.735
			
 
				+- **General Composite**: 0.758
			
 
				+### deepseek-coder-v2:16b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 24.5
			
 
				+- **Latency (ms)**: 1415.1
			
 
				+- **Coding Composite**: 0.738
			
 
				+- **General Composite**: 0.762
			
 
				+### qwen2.5-coder:14B
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 2195.9
			
 
				+- **Coding Composite**: 0.572
			
 
				+- **General Composite**: 0.609
			
 
				+### qwen2.5-coder:latest
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.8
			
 
				+- **Latency (ms)**: 1228.2
			
 
				+- **Coding Composite**: 0.667
			
 
				+- **General Composite**: 0.694
			
 
				+### llama3.1:8b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.823
			
 
				+- **General Quality**: 0.877
			
 
				+- **Avg Tokens/sec**: 11.8
			
 
				+- **Latency (ms)**: 2249.3
			
 
				+- **Coding Composite**: 0.596
			
 
				+- **General Composite**: 0.621
			
 
				+### qwen2.5-coder:7b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.7
			
 
				+- **Latency (ms)**: 1231.9
			
 
				+- **Coding Composite**: 0.666
			
 
				+- **General Composite**: 0.693
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.873
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.4
			
 
				+- **Latency (ms)**: 6355.8
			
 
				+- **Coding Composite**: 0.441
			
 
				+- **General Composite**: 0.483
			
 
				+### llama3.2:3b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.89
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 22.3
			
 
				+- **Latency (ms)**: 644.2
			
 
				+- **Coding Composite**: 0.785
			
 
				+- **General Composite**: 0.814
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
--- a/benchmarks/results/benchmark_20260310T122818.md
+++ b/benchmarks/results/benchmark_20260310T122818.md
@@ -0,0 +1,107 @@
 
				+# Benchmark Results - 20260310T122818
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model                 | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | --------------------- | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b           | 0.835           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b           | 0.624           |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M  | 0.481           |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b | 0.727           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | qwen2.5-coder:7b      | 0.674           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:latest  | 0.671           |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### codellama:34b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.586
			
 
				+- **Avg Tokens/sec**: 3.2
			
 
				+- **Latency (ms)**: 4261.3
			
 
				+- **Coding Composite**: 0.436
			
 
				+- **General Composite**: 0.325
			
 
				+
			
 
				+### deepseek-coder-v2:16b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 24.1
			
 
				+- **Latency (ms)**: 1583.1
			
 
				+- **Coding Composite**: 0.727
			
 
				+- **General Composite**: 0.75
			
 
				+
			
 
				+### qwen2.5-coder:14B
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 2172.1
			
 
				+- **Coding Composite**: 0.573
			
 
				+- **General Composite**: 0.61
			
 
				+
			
 
				+### qwen2.5-coder:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.4
			
 
				+- **Latency (ms)**: 1102.0
			
 
				+- **Coding Composite**: 0.671
			
 
				+- **General Composite**: 0.698
			
 
				+
			
 
				+### llama3.1:8b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.823
			
 
				+- **General Quality**: 0.877
			
 
				+- **Avg Tokens/sec**: 11.9
			
 
				+- **Latency (ms)**: 2186.7
			
 
				+- **Coding Composite**: 0.6
			
 
				+- **General Composite**: 0.624
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.6
			
 
				+- **Latency (ms)**: 1073.7
			
 
				+- **Coding Composite**: 0.674
			
 
				+- **General Composite**: 0.701
			
 
				+
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.873
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.2
			
 
				+- **Latency (ms)**: 6142.8
			
 
				+- **Coding Composite**: 0.439
			
 
				+- **General Composite**: 0.481
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.89
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 24.5
			
 
				+- **Latency (ms)**: 568.5
			
 
				+- **Coding Composite**: 0.806
			
 
				+- **General Composite**: 0.835
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260310T160815.md
+++ b/benchmarks/results/benchmark_20260310T160815.md
@@ -0,0 +1,107 @@
 
				+# Benchmark Results - 20260310T160815
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+
			
 
				+
			
 
				+| Slot | Socket              | Role             | Model                    | Composite Score |
			
 
				+| ---- | ------------------- | ---------------- | ------------------------ | --------------- |
			
 
				+| 1    | Node 1 (port 11434) | General (locked) | llama3.2:3b              | 0.832           |
			
 
				+| 2    | Node 1 (port 11434) | General (locked) | llama3.1:8b              | 0.624           |
			
 
				+| 5    | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M     | 0.482           |
			
 
				+| 3    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:16b    | 0.737           |
			
 
				+| 4    | Node 0 (port 11435) | Coding (locked)  | deepseek-coder-v2:latest | 0.735           |
			
 
				+| 6    | Node 0 (port 11435) | Coding (rotate)  | qwen2.5-coder:7b         | 0.666           |
			
 
				+
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+
			
 
				+### codellama:34b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.586
			
 
				+- **Avg Tokens/sec**: 3.2
			
 
				+- **Latency (ms)**: 4336.2
			
 
				+- **Coding Composite**: 0.432
			
 
				+- **General Composite**: 0.321
			
 
				+
			
 
				+### deepseek-coder-v2:latest
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 24.1
			
 
				+- **Latency (ms)**: 1411.4
			
 
				+- **Coding Composite**: 0.735
			
 
				+- **General Composite**: 0.759
			
 
				+
			
 
				+### deepseek-coder-v2:16b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 24.2
			
 
				+- **Latency (ms)**: 1383.8
			
 
				+- **Coding Composite**: 0.737
			
 
				+- **General Composite**: 0.76
			
 
				+
			
 
				+### qwen2.5-coder:14B
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 2181.0
			
 
				+- **Coding Composite**: 0.573
			
 
				+- **General Composite**: 0.609
			
 
				+
			
 
				+### llama3.1:8b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.823
			
 
				+- **General Quality**: 0.877
			
 
				+- **Avg Tokens/sec**: 11.8
			
 
				+- **Latency (ms)**: 2183.4
			
 
				+- **Coding Composite**: 0.6
			
 
				+- **General Composite**: 0.624
			
 
				+
			
 
				+### qwen2.5-coder:7b
			
 
				+
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.6
			
 
				+- **Latency (ms)**: 1210.0
			
 
				+- **Coding Composite**: 0.666
			
 
				+- **General Composite**: 0.693
			
 
				+
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.873
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.2
			
 
				+- **Latency (ms)**: 5540.1
			
 
				+- **Coding Composite**: 0.44
			
 
				+- **General Composite**: 0.482
			
 
				+
			
 
				+### llama3.2:3b
			
 
				+
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.89
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 24.2
			
 
				+- **Latency (ms)**: 581.0
			
 
				+- **Coding Composite**: 0.803
			
 
				+- **General Composite**: 0.832
			
 
				+
			
 
				+## Scoring Formula
			
 
				+
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
 
				+
			
--- a/benchmarks/results/benchmark_20260310T170013.md
+++ b/benchmarks/results/benchmark_20260310T170013.md
@@ -0,0 +1,78 @@
 
				+# Benchmark Results - 20260310T170013
			
 
				+
			
 
				+## Model Selection (6-slot / 2-socket)
			
 
				+| Slot | Socket | Role | Model | Composite Score |
			
 
				+|------|--------|------|-------|----------------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.57 |
			
 
				+
			
 
				+## Detailed Metrics
			
 
				+### codellama:34b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.586
			
 
				+- **Avg Tokens/sec**: 3.2
			
 
				+- **Latency (ms)**: 4235.4
			
 
				+- **Coding Composite**: 0.437
			
 
				+- **General Composite**: 0.326
			
 
				+### deepseek-coder-v2:16b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.833
			
 
				+- **General Quality**: 0.885
			
 
				+- **Avg Tokens/sec**: 23.5
			
 
				+- **Latency (ms)**: 1568.5
			
 
				+- **Coding Composite**: 0.723
			
 
				+- **General Composite**: 0.746
			
 
				+### qwen2.5-coder:14B
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.931
			
 
				+- **Avg Tokens/sec**: 6.6
			
 
				+- **Latency (ms)**: 2229.7
			
 
				+- **Coding Composite**: 0.57
			
 
				+- **General Composite**: 0.607
			
 
				+### llama3.1:8b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.823
			
 
				+- **General Quality**: 0.877
			
 
				+- **Avg Tokens/sec**: 11.8
			
 
				+- **Latency (ms)**: 2202.0
			
 
				+- **Coding Composite**: 0.599
			
 
				+- **General Composite**: 0.623
			
 
				+### qwen2.5-coder:7b
			
 
				+- **Category**: coding
			
 
				+- **Coding Quality**: 0.85
			
 
				+- **General Quality**: 0.91
			
 
				+- **Avg Tokens/sec**: 12.5
			
 
				+- **Latency (ms)**: 1431.0
			
 
				+- **Coding Composite**: 0.655
			
 
				+- **General Composite**: 0.682
			
 
				+### gemma3:12b-it-q4_K_M
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.873
			
 
				+- **General Quality**: 0.966
			
 
				+- **Avg Tokens/sec**: 6.1
			
 
				+- **Latency (ms)**: 5941.9
			
 
				+- **Coding Composite**: 0.439
			
 
				+- **General Composite**: 0.481
			
 
				+### llama3.2:3b
			
 
				+- **Category**: general
			
 
				+- **Coding Quality**: 0.89
			
 
				+- **General Quality**: 0.954
			
 
				+- **Avg Tokens/sec**: 23.0
			
 
				+- **Latency (ms)**: 754.8
			
 
				+- **Coding Composite**: 0.786
			
 
				+- **General Composite**: 0.814
			
 
				+
			
 
				+## Scoring Formula
			
 
				+- Composite = quality * 0.45 + token_speed_normalized * 0.30 + latency_score * 0.25
			
 
				+- Speed normalized against 40 tok/sec ceiling (hardware-observed max)
			
 
				+- Coding quality (per-prompt):
			
 
				+  code_gen: has_def×0.20 + has_return×0.20 + has_docstring×0.15 + has_type_hint×0.15 + has_code_block×0.10 + has_assert×0.08 + has_test_def×0.07 + has_import×0.05
			
 
				+  debug:    has_def×0.30 + has_return×0.30 + has_code_block×0.25 + has_assert×0.15
			
 
				+  refactor: has_def×0.25 + has_return×0.25 + has_code_block×0.20 + has_type_hint×0.15 + has_import×0.15
			
 
				+- Category: override dict → quality delta (coding_avg - general_avg >= 0.1) → name pattern (coder/codestral/codellama/starcoder) → general
			
--- a/benchmarks/results/benchmark_review_20260310.md
+++ b/benchmarks/results/benchmark_review_20260310.md
@@ -0,0 +1,433 @@
 
				+# Ticket Summary — Post-Change Benchmark Review: num_predict 300 → 500
			
 
				+
			
 
				+## Description
			
 
				+
			
 
				+After resolving the dual NUMA/CPUAffinity performance regression (2026-03-10), two
			
 
				+post-fix benchmark runs were executed to validate the effect of raising
			
 
				+`benchmark_num_predict` from 300 to 500. This document captures the four-run history,
			
 
				+before/after comparison, full Run 4 model results, and findings on system tuning state.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Acceptance Criteria
			
 
				+
			
 
				+- [x] Run 3 (num_predict=300) and Run 4 (num_predict=500) compared on common models
			
 
				+- [x] All tuning variables reviewed and declared optimal or requiring action
			
 
				+- [x] Any model-identity anomalies flagged for follow-up
			
 
				+- [x] MEMORY.md updated with current variable values
			
 
				+- [x] This ticket summary written to `benchmarks/results/`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Work Implemented
			
 
				+
			
 
				+### Run History
			
 
				+
			
 
				+| Run | Timestamp | Condition | Result |
			
 
				+|-----|-----------|-----------|--------|
			
 
				+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | quality=0, tok/sec≈0.0–0.1 |
			
 
				+| 2 | 20260309T174604 | Broken NUMA (same bug) | quality=0, tok/sec=0.1 |
			
 
				+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300, 4 models | quality=0.78–0.97, tok/sec=6.5–22.8 |
			
 
				+| 4 | 20260310T110632 | Post-NUMA-fix, num_predict=500, 9 models | quality=0.83–0.97, tok/sec=3.2–25.0 |
			
 
				+
			
 
				+### Before vs. After (Runs 3 → 4, common models)
			
 
				+
			
 
				+| Model | coding_quality @ 300 | coding_quality @ 500 | Delta |
			
 
				+|-------|---------------------|---------------------|-------|
			
 
				+| deepseek-coder-v2:latest | 0.783 | 0.833 | +0.050 |
			
 
				+| qwen2.5-coder:7b | 0.800 | 0.850 | +0.050 |
			
 
				+| llama3.2:3b | 0.850 | 0.890 | +0.040 |
			
 
				+| gemma3:12b-it-q4_K_M | 0.850 | 0.873 | +0.023 |
			
 
				+
			
 
				+### Full Run 4 Results (num_predict=500, 9 models)
			
 
				+
			
 
				+| Model | tok/sec | coding_q | general_q | latency_ms | coding_composite | general_composite | category |
			
 
				+|-------|---------|----------|-----------|------------|-----------------|------------------|----------|
			
 
				+| deepseek-coder-v2:16b | 24.5 | 0.833 | 0.885 | 1415.1 | 0.738 | 0.762 | coding |
			
 
				+| deepseek-coder-v2:latest | 25.0 | 0.833 | 0.885 | 1543.2 | 0.735 | 0.758 | coding |
			
 
				+| qwen2.5-coder:latest | 12.8 | 0.850 | 0.910 | 1228.2 | 0.667 | 0.694 | coding |
			
 
				+| qwen2.5-coder:7b | 12.7 | 0.850 | 0.910 | 1231.9 | 0.666 | 0.693 | coding |
			
 
				+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2195.9 | 0.572 | 0.609 | coding |
			
 
				+| codellama:34b | 3.2 | 0.833 | 0.586 | 4244.1 | 0.437 | 0.326 | coding |
			
 
				+| llama3.2:3b | 22.3 | 0.890 | 0.954 | 644.2 | 0.785 | 0.814 | general |
			
 
				+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2249.3 | 0.596 | 0.621 | general |
			
 
				+| gemma3:12b-it-q4_K_M | 6.4 | 0.873 | 0.966 | 6355.8 | 0.441 | 0.483 | general |
			
 
				+
			
 
				+### Current Slot Assignments (model_selection.json)
			
 
				+
			
 
				+| Slot | Socket | Role | Model | Composite |
			
 
				+|------|--------|------|-------|-----------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.621 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.738 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.483 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.667 |
			
 
				+
			
 
				+### Tuning Variable Status
			
 
				+
			
 
				+| Variable | Value | Status |
			
 
				+|----------|-------|--------|
			
 
				+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is now the binding constraint |
			
 
				+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at current 3–25 tok/sec speeds |
			
 
				+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 62.5% of ceiling |
			
 
				+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback handling remaining cases |
			
 
				+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving platform |
			
 
				+
			
 
				+### Findings
			
 
				+
			
 
				+**Finding 1 — num_predict=500 confirmed correct.** Every model improved on coding_quality
			
 
				+(+0.023 to +0.050). No timeouts observed. The rubric ceiling is now the binding constraint;
			
 
				+further increases (700+) would yield at most +0.02 additional improvement.
			
 
				+
			
 
				+**Finding 2 — Coding quality inversion narrowed (expected, not a bug).** Coding specialists
			
 
				+score lower on coding than general quality because general prompts don't require `assert`,
			
 
				+`test_def`, or `type_hint` (the hardest scoring markers). The gap halved from ~−0.110 to
			
 
				+~−0.052 vs. Run 3, confirming truncation was part of the cause. Name-pattern fallback
			
 
				+continues to correctly classify these models.
			
 
				+
			
 
				+**Finding 3 — deepseek-coder-v2:16b and :latest may be the same weights (ACTION REQUIRED).**
			
 
				+Both share identical quality scores (0.833/0.885) and nearly identical throughput (24.5 vs.
			
 
				+25.0 tok/sec). In Ollama, `:latest` typically resolves to the same weights as the default
			
 
				+variant. If confirmed identical, slots 3 and 4 hold duplicate models — zero benefit, wasted
			
 
				+VRAM. See Testing Needed for verification steps.
			
 
				+
			
 
				+**Finding 4 — qwen2.5-coder:latest and :7b are near-identical (informational).** Composites
			
 
				+of 0.667 vs. 0.666. Lower impact since only one is active in slot 6 at a time.
			
 
				+
			
 
				+**Finding 5 — llama3.2:3b outperforms coding specialists on coding composite (informational).**
			
 
				+coding_composite=0.785 beats all dedicated coding models. Mathematically correct: speed
			
 
				+(22.3 tok/sec) and latency (644ms) dominate. Correctly classified general because
			
 
				+general_composite (0.814) > coding_composite (0.785), delta < 0.10 threshold.
			
 
				+
			
 
				+**Finding 6 — codellama:34b correctly excluded.** 3.2 tok/sec, general_quality=0.586 falls
			
 
				+below min_quality_score=0.6. Scoring system worked as designed.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Testing Needed
			
 
				+
			
 
				+### Finding 3 — Verify deepseek-coder-v2:16b vs :latest digest
			
 
				+
			
 
				+Run on `ai_server`:
			
 
				+
			
 
				+```bash
			
 
				+ollama show deepseek-coder-v2:16b --modelfile | grep FROM
			
 
				+ollama show deepseek-coder-v2:latest --modelfile | grep FROM
			
 
				+```
			
 
				+
			
 
				+**If digests match (same weights):** update `model_selection.json` slot4_coding manually
			
 
				+(or remove one deepseek variant and re-run `03_benchmark.yml`) to redirect slot 4 to
			
 
				+`qwen2.5-coder:14B` (composite=0.572) or another diverse candidate for model diversity.
			
 
				+
			
 
				+**If digests differ (different weights):** no action — the pipeline is working as designed.
			
 
				+
			
 
				+### Regression check after any slot4 change
			
 
				+
			
 
				+If slot4 is redirected, run:
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+Confirm both warmup services start cleanly:
			
 
				+
			
 
				+```bash
			
 
				+systemctl status ollama-warmup.service ollama-warmup-node0.service
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Addendum — Run 5 Review (post deepseek:latest removal)
			
 
				+
			
 
				+## Run History (all five runs)
			
 
				+
			
 
				+| Run | Timestamp | Condition | Models | Result |
			
 
				+|-----|-----------|-----------|--------|--------|
			
 
				+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
			
 
				+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
			
 
				+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
			
 
				+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
			
 
				+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
			
 
				+
			
 
				+## Run 4 → Run 5 Comparison (all common models)
			
 
				+
			
 
				+| Model | R4 tok/sec | R5 tok/sec | R4 coding_comp | R5 coding_comp | Delta |
			
 
				+|-------|-----------|-----------|----------------|----------------|-------|
			
 
				+| deepseek-coder-v2:16b | 24.5 | 24.1 | 0.738 | 0.727 | −0.011 (noise) |
			
 
				+| qwen2.5-coder:latest | 12.8 | 12.4 | 0.667 | 0.671 | +0.004 (noise) |
			
 
				+| qwen2.5-coder:7b | 12.7 | 12.6 | 0.666 | 0.674 | +0.008 (noise) |
			
 
				+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.572 | 0.573 | +0.001 (noise) |
			
 
				+| llama3.2:3b | 22.3 | 24.5 | 0.785 | 0.806 | +0.021 (notable) |
			
 
				+| llama3.1:8b | 11.8 | 11.9 | 0.596 | 0.600 | +0.004 (noise) |
			
 
				+| gemma3:12b-it-q4_K_M | 6.4 | 6.2 | 0.441 | 0.439 | −0.002 (noise) |
			
 
				+| codellama:34b | 3.2 | 3.2 | 0.437 | 0.436 | −0.001 (noise) |
			
 
				+
			
 
				+Quality scores (coding_quality, general_quality) are **identical** across both runs —
			
 
				+confirming rubric scores are stable and deterministic at num_predict=500.
			
 
				+
			
 
				+## Run 5 Slot Assignments (model_selection.json)
			
 
				+
			
 
				+| Slot | Socket | Role | Model | Composite |
			
 
				+|------|--------|------|-------|-----------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.835 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.727 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.674 |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:latest | 0.671 |
			
 
				+
			
 
				+Note: slot4 is `qwen2.5-coder:7b` — the pipeline correctly ranked it #2 coding (0.674),
			
 
				+superseding the manual `qwen2.5-coder:14B` edit made earlier this session.
			
 
				+
			
 
				+## Findings
			
 
				+
			
 
				+**Finding 1 — System is stable; tuning parameters remain optimal (no action).** All quality
			
 
				+scores are identical between Run 4 and Run 5. Speed and latency deltas are within normal
			
 
				+run-to-run variance (±0.4 tok/sec, ±200ms TTFT). No tuning changes needed.
			
 
				+
			
 
				+| Variable | Value | Status |
			
 
				+|----------|-------|--------|
			
 
				+| `benchmark_num_predict` | 500 | Optimal — rubric ceiling is binding constraint |
			
 
				+| `benchmark_large_timeout` | 480s | Adequate — 6–20x margin at 3–25 tok/sec |
			
 
				+| `benchmark_toks_norm_ceiling` | 40 | Correct — fastest model at 61% of ceiling |
			
 
				+| `benchmark_coding_threshold` | 0.10 | Correct — name-pattern fallback working |
			
 
				+| Scoring weights | 0.45/0.30/0.25 | Appropriate for interactive serving |
			
 
				+
			
 
				+**Finding 2 — llama3.2:3b improved after deepseek:latest removal (informational).**
			
 
				+tok/sec: 22.3 → 24.5 (+2.2), general_composite: 0.814 → 0.835 (+0.021). Likely cause:
			
 
				+removing one large model reduced memory pressure / NUMA contention during warmup. The 3b
			
 
				+model benefits most as it runs fastest and competes most for memory bandwidth.
			
 
				+
			
 
				+**Finding 3 — qwen2.5-coder:7b and :latest confirmed duplicate weights (RESOLVED).**
			
 
				+Run 5 slot4=`:7b` (0.674) and slot6=`:latest` (0.671) showed identical quality scores
			
 
				+(coding=0.850, general=0.910) and nearly identical throughput (~12.4–12.8 tok/sec) across
			
 
				+both runs — same pattern as the deepseek duplicate. Verified on ai_server:
			
 
				+
			
 
				+```
			
 
				+qwen2.5-coder:7b    → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
			
 
				+qwen2.5-coder:latest → sha256-60e05f2100071479f596b964f89f510f057ce397ea22f2833a0cfe029bfc2463
			
 
				+```
			
 
				+
			
 
				+Digests match. `qwen2.5-coder:latest` removed. Next step: re-run `03_benchmark.yml` (Run 6)
			
 
				+to promote `qwen2.5-coder:14B` to slot6_rotate, achieving genuine speed/quality diversity
			
 
				+on Node 0:
			
 
				+- slot3: deepseek-coder-v2:16b — fast+deep (24 tok/sec, 16B)
			
 
				+- slot4: qwen2.5-coder:7b — fast+light (12.6 tok/sec, 7B)
			
 
				+- slot6: qwen2.5-coder:14B — slower+richer quality (6.6 tok/sec, 14B)
			
 
				+
			
 
				+**Finding 4 — gemma3:12b latency_score=0 is persistent (informational, no action).**
			
 
				+TTFT consistently 6.1–6.4 seconds, above the 5000ms floor → latency_score=0 every run.
			
 
				+Hardware-limited (large quant loading time on Node 1), not a tuning issue. The model
			
 
				+correctly holds slot5_general_rotate on the strength of general_quality=0.966. The latency
			
 
				+penalty is working as intended.
			
 
				+
			
 
				+**Finding 5 — codellama:34b remains correctly excluded (informational, no action).**
			
 
				+composite=0.436, general_quality=0.586 — below both min_composite_score=0.55 and
			
 
				+min_quality_score=0.6 every run. Pipeline working as designed.
			
 
				+
			
 
				+## Next Action
			
 
				+
			
 
				+Run 6: re-benchmark after `qwen2.5-coder:latest` removal to promote `qwen2.5-coder:14B`
			
 
				+to slot6_rotate and achieve model diversity on Node 0.
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Addendum — Run 6 Review (post qwen2.5-coder:latest removal)
			
 
				+
			
 
				+## Run History (all six runs)
			
 
				+
			
 
				+| Run | Timestamp | Condition | Models | Result |
			
 
				+|-----|-----------|-----------|--------|--------|
			
 
				+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
			
 
				+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
			
 
				+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
			
 
				+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
			
 
				+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
			
 
				+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
			
 
				+
			
 
				+## Full Run 6 Results
			
 
				+
			
 
				+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
			
 
				+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
			
 
				+| deepseek-coder-v2:16b | 24.2 | 0.833 | 0.885 | 1383.8 | 0.737 | 0.760 | coding |
			
 
				+| deepseek-coder-v2:latest | 24.1 | 0.833 | 0.885 | 1411.4 | 0.735 | 0.759 | coding |
			
 
				+| qwen2.5-coder:7b | 12.6 | 0.850 | 0.910 | 1210.0 | 0.666 | 0.693 | coding |
			
 
				+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2181.0 | 0.573 | 0.609 | coding |
			
 
				+| codellama:34b | 3.2 | 0.833 | 0.586 | 4336.2 | 0.432 | 0.321 | coding |
			
 
				+| llama3.2:3b | 24.2 | 0.890 | 0.954 | 581.0 | 0.803 | 0.832 | general |
			
 
				+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2183.4 | 0.600 | 0.624 | general |
			
 
				+| gemma3:12b-it-q4_K_M | 6.2 | 0.873 | 0.966 | 5540.1 | 0.440 | 0.482 | general |
			
 
				+
			
 
				+## Run 5 → Run 6 Comparison (all common models)
			
 
				+
			
 
				+| Model | R5 tok/sec | R6 tok/sec | R5 coding_comp | R6 coding_comp | Delta |
			
 
				+|-------|-----------|-----------|----------------|----------------|-------|
			
 
				+| deepseek-coder-v2:16b | 24.1 | 24.2 | 0.727 | 0.737 | +0.010 (noise) |
			
 
				+| qwen2.5-coder:7b | 12.6 | 12.6 | 0.674 | 0.666 | −0.008 (noise) |
			
 
				+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.573 | 0.000 |
			
 
				+| llama3.2:3b | 24.5 | 24.2 | 0.806 | 0.803 | −0.003 (noise) |
			
 
				+| llama3.1:8b | 11.9 | 11.8 | 0.600 | 0.600 | 0.000 |
			
 
				+| gemma3:12b-it-q4_K_M | 6.2 | 6.2 | 0.439 | 0.440 | +0.001 (noise) |
			
 
				+| codellama:34b | 3.2 | 3.2 | 0.436 | 0.432 | −0.004 (noise) |
			
 
				+
			
 
				+Quality scores are **identical** across all common models. All composites within run-to-run
			
 
				+noise (≤ ±0.010). Rubric confirmed deterministic across 6 runs.
			
 
				+
			
 
				+## Run 6 Slot Assignments (model_selection.json — current state)
			
 
				+
			
 
				+| Slot | Socket | Role | Model | Composite |
			
 
				+|------|--------|------|-------|-----------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.832 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.624 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.482 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.737 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:latest | 0.735 ← REGRESSION |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:7b | 0.666 |
			
 
				+
			
 
				+## Findings
			
 
				+
			
 
				+**Finding 1 — deepseek-coder-v2:latest re-appeared in slot4 (REGRESSION, now fixed).**
			
 
				+Previously confirmed duplicate of `:16b` and removed after Run 4. Re-appeared in Run 6
			
 
				+because `group_vars/all.yml` contained two pull sources:
			
 
				+
			
 
				+1. `baseline_models` (line 121): `"deepseek-coder-v2"` — untagged, Ollama resolves to
			
 
				+   `:latest`, re-pulling the duplicate on every benchmark run.
			
 
				+2. `candidate_models`: explicit `"deepseek-coder-v2:latest"` entry — unconditionally pulls
			
 
				+   `:latest` as a testable model.
			
 
				+
			
 
				+**Fix applied to `inventory/group_vars/all.yml`:**
			
 
				+- `baseline_models`: changed `"deepseek-coder-v2"` → `"deepseek-coder-v2:16b"` (explicit tag)
			
 
				+- `candidate_models`: removed the `deepseek-coder-v2:latest` entry entirely
			
 
				+
			
 
				+**Also required on ai_server:** `ollama rm deepseek-coder-v2:latest`
			
 
				+
			
 
				+**Finding 2 — All scores and tuning variables remain stable (no action).** Every delta vs
			
 
				+Run 5 is within noise (≤ ±0.010 composite, quality scores identical). The rubric is
			
 
				+confirmed deterministic across 6 runs.
			
 
				+
			
 
				+| Variable | Value | Status |
			
 
				+|----------|-------|--------|
			
 
				+| `benchmark_num_predict` | 500 | Optimal |
			
 
				+| `benchmark_large_timeout` | 480s | Adequate |
			
 
				+| `benchmark_toks_norm_ceiling` | 40 | Correct |
			
 
				+| `benchmark_coding_threshold` | 0.10 | Correct |
			
 
				+
			
 
				+**Finding 3 — qwen2.5-coder:14B not yet in slot6 (consequence of Finding 1).** With
			
 
				+deepseek:latest occupying slot4, the coding rank yields:
			
 
				+  #1 deepseek:16b (0.737) → slot3, #2 deepseek:latest (0.735) → slot4,
			
 
				+  #3 qwen:7b (0.666) → slot6, #4 qwen:14B (0.573) → excluded.
			
 
				+After deepseek:latest is permanently removed, Run 7 expected layout:
			
 
				+  slot3=deepseek:16b, slot4=qwen:7b, slot6=qwen:14B.
			
 
				+
			
 
				+**Finding 4 — gemma3:12b TTFT=5540ms (informational, no action).** Persistently above
			
 
				+5000ms floor → latency_score=0 every run. Hardware-limited, not a tuning issue.
			
 
				+Correctly holds slot5_general_rotate on general_quality=0.966.
			
 
				+
			
 
				+**Finding 5 — codellama:34b correctly excluded again (informational, no action).**
			
 
				+composite=0.432, general_quality=0.586 — below both thresholds. Pipeline working as designed.
			
 
				+
			
 
				+## Next Action
			
 
				+
			
 
				+1. Remove duplicate from ai_server: `ollama rm deepseek-coder-v2:latest`
			
 
				+2. Run 7 (clean benchmark):
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/03_benchmark.yml -K -e @local.yml && \
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				+
			
 
				+Expected Run 7: slot4=`qwen2.5-coder:7b`, slot6=`qwen2.5-coder:14B`,
			
 
				+`deepseek-coder-v2:latest` absent from `all_metrics`.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Addendum — Run 7 Review (target Node 0 layout achieved, session closed)
			
 
				+
			
 
				+## Run History (all seven runs)
			
 
				+
			
 
				+| Run | Timestamp | Condition | Models | Result |
			
 
				+|-----|-----------|-----------|--------|--------|
			
 
				+| 1 | 20260309T080551 | Broken NUMA (membind + CPUAffinity) | — | quality=0, tok/sec≈0.0–0.1 |
			
 
				+| 2 | 20260309T174604 | Broken NUMA (same bug) | — | quality=0, tok/sec=0.1 |
			
 
				+| 3 | 20260310T094843 | Post-NUMA-fix, num_predict=300 | 4 | quality=0.78–0.97, tok/sec=6.5–22.8 |
			
 
				+| 4 | 20260310T110632 | num_predict=500, deepseek:latest present | 9 | quality=0.83–0.97, tok/sec=3.2–25.0 |
			
 
				+| 5 | 20260310T122818 | num_predict=500, deepseek:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.5 |
			
 
				+| 6 | 20260310T160815 | num_predict=500, qwen2.5-coder:latest removed | 8 | quality=0.83–0.97, tok/sec=3.2–24.2 |
			
 
				+| 7 | 20260310T170013 | group_vars fix applied, deepseek:latest absent | 7 | quality=0.83–0.97, tok/sec=3.2–23.5 |
			
 
				+
			
 
				+## Full Run 7 Results
			
 
				+
			
 
				+| Model | tok/sec | coding_q | general_q | latency_ms | coding_comp | general_comp | category |
			
 
				+|-------|---------|----------|-----------|------------|-------------|--------------|----------|
			
 
				+| deepseek-coder-v2:16b | 23.5 | 0.833 | 0.885 | 1568.5 | 0.723 | 0.746 | coding |
			
 
				+| qwen2.5-coder:7b | 12.5 | 0.850 | 0.910 | 1431.0 | 0.655 | 0.682 | coding |
			
 
				+| qwen2.5-coder:14B | 6.6 | 0.850 | 0.931 | 2229.7 | 0.570 | 0.607 | coding |
			
 
				+| codellama:34b | 3.2 | 0.833 | 0.586 | 4235.4 | 0.437 | 0.326 | coding |
			
 
				+| llama3.2:3b | 23.0 | 0.890 | 0.954 | 754.8 | 0.786 | 0.814 | general |
			
 
				+| llama3.1:8b | 11.8 | 0.823 | 0.877 | 2202.0 | 0.599 | 0.623 | general |
			
 
				+| gemma3:12b-it-q4_K_M | 6.1 | 0.873 | 0.966 | 5941.9 | 0.439 | 0.481 | general |
			
 
				+
			
 
				+`deepseek-coder-v2:latest` **absent** from `all_metrics` — group_vars fix verified working.
			
 
				+
			
 
				+## Run 6 → Run 7 Comparison (all common models)
			
 
				+
			
 
				+| Model | R6 tok/sec | R7 tok/sec | R6 coding_comp | R7 coding_comp | Delta |
			
 
				+|-------|-----------|-----------|----------------|----------------|-------|
			
 
				+| deepseek-coder-v2:16b | 24.2 | 23.5 | 0.737 | 0.723 | −0.014 (noise) |
			
 
				+| qwen2.5-coder:7b | 12.6 | 12.5 | 0.666 | 0.655 | −0.011 (noise) |
			
 
				+| qwen2.5-coder:14B | 6.6 | 6.6 | 0.573 | 0.570 | −0.003 (noise) |
			
 
				+| llama3.2:3b | 24.2 | 23.0 | 0.803 | 0.786 | −0.017 (noise) |
			
 
				+| llama3.1:8b | 11.8 | 11.8 | 0.600 | 0.599 | −0.001 (noise) |
			
 
				+| gemma3:12b-it-q4_K_M | 6.2 | 6.1 | 0.440 | 0.439 | −0.001 (noise) |
			
 
				+| codellama:34b | 3.2 | 3.2 | 0.432 | 0.437 | +0.005 (noise) |
			
 
				+
			
 
				+Quality scores are **identical** across all common models. All composites within run-to-run
			
 
				+noise (≤ ±0.017). Rubric confirmed deterministic across 7 runs.
			
 
				+
			
 
				+## Run 7 Slot Assignments (final, confirmed clean)
			
 
				+
			
 
				+| Slot | Socket | Role | Model | Composite |
			
 
				+|------|--------|------|-------|-----------|
			
 
				+| 1 | Node 1 (port 11434) | General (locked) | llama3.2:3b | 0.814 |
			
 
				+| 2 | Node 1 (port 11434) | General (locked) | llama3.1:8b | 0.623 |
			
 
				+| 5 | Node 1 (port 11434) | General (rotate) | gemma3:12b-it-q4_K_M | 0.481 |
			
 
				+| 3 | Node 0 (port 11435) | Coding (locked) | deepseek-coder-v2:16b | 0.723 |
			
 
				+| 4 | Node 0 (port 11435) | Coding (locked) | qwen2.5-coder:7b | 0.655 ✅ |
			
 
				+| 6 | Node 0 (port 11435) | Coding (rotate) | qwen2.5-coder:14B | 0.570 ✅ |
			
 
				+
			
 
				+## Findings
			
 
				+
			
 
				+**Finding 1 — Target Node 0 diversity layout achieved (RESOLVED).** Run 7 confirms the
			
 
				+intended three-tier Node 0 layout:
			
 
				+- slot3: deepseek-coder-v2:16b — deep specialist (23.5 tok/sec, 16B params)
			
 
				+- slot4: qwen2.5-coder:7b — fast+light (12.5 tok/sec, 7B params)
			
 
				+- slot6: qwen2.5-coder:14B — slower+richer (6.6 tok/sec, 14B params)
			
 
				+
			
 
				+All three are genuinely distinct models with different speed/quality tradeoffs.
			
 
				+
			
 
				+**Finding 2 — group_vars fix verified working (RESOLVED).** `deepseek-coder-v2:latest` is
			
 
				+absent from `all_metrics`. Explicit `:16b` tag in `baseline_models` prevents Ollama from
			
 
				+resolving to `:latest` on subsequent runs. The fix is durable — re-running `03_benchmark.yml`
			
 
				+will not re-introduce the duplicate.
			
 
				+
			
 
				+**Finding 3 — All scores and tuning variables stable (no action).** Every delta vs Run 6 is
			
 
				+within noise (≤ ±0.017 composite, quality scores identical). The pipeline is confirmed
			
 
				+deterministic and stable.
			
 
				+
			
 
				+| Variable | Value | Status |
			
 
				+|----------|-------|--------|
			
 
				+| `benchmark_num_predict` | 500 | Optimal |
			
 
				+| `benchmark_large_timeout` | 480s | Adequate |
			
 
				+| `benchmark_toks_norm_ceiling` | 40 | Correct |
			
 
				+| `benchmark_coding_threshold` | 0.10 | Correct |
			
 
				+
			
 
				+**Finding 4 — Benchmark pipeline declared stable. Session closed.** Seven runs over two
			
 
				+days confirmed: NUMA fix correct, scoring rubric deterministic, duplicate-model detection
			
 
				+pattern documented, group_vars idempotent. No further benchmark runs or tuning changes are
			
 
				+needed unless new models are added to `candidate_models`.
			
--- a/benchmarks/results/model_selection.json
+++ b/benchmarks/results/model_selection.json
@@ -1,116 +1,196 @@
 
				 {
			
 
				     "all_metrics": {
			
 
				-        "deepseek-coder-v2:latest": {
			
 
				-            "avg_tok_per_sec": 21.6,
			
 
				+        "codellama:34b": {
			
 
				+            "avg_tok_per_sec": 3.2,
			
 
				             "category": "coding",
			
 
				-            "coding_composite": 0.764,
			
 
				-            "coding_quality": 0.657,
			
 
				-            "general_composite": 0.867,
			
 
				-            "general_quality": 0.886,
			
 
				-            "latency_ms": 1510.5,
			
 
				-            "latency_score": 0.698,
			
 
				-            "toks_norm": 0.982
			
 
				+            "coding_composite": 0.437,
			
 
				+            "coding_quality": 0.833,
			
 
				+            "general_composite": 0.326,
			
 
				+            "general_quality": 0.586,
			
 
				+            "latency_ms": 4235.4,
			
 
				+            "latency_score": 0.153,
			
 
				+            "toks_norm": 0.08
			
 
				+        },
			
 
				+        "deepseek-coder-v2:16b": {
			
 
				+            "avg_tok_per_sec": 23.5,
			
 
				+            "category": "coding",
			
 
				+            "coding_composite": 0.723,
			
 
				+            "coding_quality": 0.833,
			
 
				+            "general_composite": 0.746,
			
 
				+            "general_quality": 0.885,
			
 
				+            "latency_ms": 1568.5,
			
 
				+            "latency_score": 0.686,
			
 
				+            "toks_norm": 0.586
			
 
				         },
			
 
				         "gemma3:12b-it-q4_K_M": {
			
 
				-            "avg_tok_per_sec": 5.6,
			
 
				+            "avg_tok_per_sec": 6.1,
			
 
				             "category": "general",
			
 
				-            "coding_composite": 0.416,
			
 
				-            "coding_quality": 0.757,
			
 
				-            "general_composite": 0.495,
			
 
				-            "general_quality": 0.931,
			
 
				-            "latency_ms": 5975.8,
			
 
				+            "coding_composite": 0.439,
			
 
				+            "coding_quality": 0.873,
			
 
				+            "general_composite": 0.481,
			
 
				+            "general_quality": 0.966,
			
 
				+            "latency_ms": 5941.9,
			
 
				             "latency_score": 0,
			
 
				-            "toks_norm": 0.253
			
 
				+            "toks_norm": 0.153
			
 
				+        },
			
 
				+        "llama3.1:8b": {
			
 
				+            "avg_tok_per_sec": 11.8,
			
 
				+            "category": "general",
			
 
				+            "coding_composite": 0.599,
			
 
				+            "coding_quality": 0.823,
			
 
				+            "general_composite": 0.623,
			
 
				+            "general_quality": 0.877,
			
 
				+            "latency_ms": 2202.0,
			
 
				+            "latency_score": 0.56,
			
 
				+            "toks_norm": 0.294
			
 
				         },
			
 
				         "llama3.2:3b": {
			
 
				-            "avg_tok_per_sec": 22.5,
			
 
				+            "avg_tok_per_sec": 23.0,
			
 
				             "category": "general",
			
 
				-            "coding_composite": 0.846,
			
 
				-            "coding_quality": 0.723,
			
 
				-            "general_composite": 0.961,
			
 
				-            "general_quality": 0.979,
			
 
				-            "latency_ms": 580.7,
			
 
				-            "latency_score": 0.884,
			
 
				-            "toks_norm": 1.0
			
 
				+            "coding_composite": 0.786,
			
 
				+            "coding_quality": 0.89,
			
 
				+            "general_composite": 0.814,
			
 
				+            "general_quality": 0.954,
			
 
				+            "latency_ms": 754.8,
			
 
				+            "latency_score": 0.849,
			
 
				+            "toks_norm": 0.576
			
 
				+        },
			
 
				+        "qwen2.5-coder:14B": {
			
 
				+            "avg_tok_per_sec": 6.6,
			
 
				+            "category": "coding",
			
 
				+            "coding_composite": 0.57,
			
 
				+            "coding_quality": 0.85,
			
 
				+            "general_composite": 0.607,
			
 
				+            "general_quality": 0.931,
			
 
				+            "latency_ms": 2229.7,
			
 
				+            "latency_score": 0.554,
			
 
				+            "toks_norm": 0.164
			
 
				         },
			
 
				         "qwen2.5-coder:7b": {
			
 
				-            "avg_tok_per_sec": 12.3,
			
 
				+            "avg_tok_per_sec": 12.5,
			
 
				             "category": "coding",
			
 
				-            "coding_composite": 0.664,
			
 
				-            "coding_quality": 0.683,
			
 
				-            "general_composite": 0.756,
			
 
				-            "general_quality": 0.888,
			
 
				-            "latency_ms": 1222.4,
			
 
				-            "latency_score": 0.756,
			
 
				-            "toks_norm": 0.56
			
 
				+            "coding_composite": 0.655,
			
 
				+            "coding_quality": 0.85,
			
 
				+            "general_composite": 0.682,
			
 
				+            "general_quality": 0.91,
			
 
				+            "latency_ms": 1431.0,
			
 
				+            "latency_score": 0.714,
			
 
				+            "toks_norm": 0.312
			
 
				         }
			
 
				     },
			
 
				     "coding_ranking": [
			
 
				         {
			
 
				-            "composite": 0.764,
			
 
				+            "composite": 0.723,
			
 
				             "metrics": {
			
 
				-                "avg_tok_per_sec": 21.6,
			
 
				+                "avg_tok_per_sec": 23.5,
			
 
				                 "category": "coding",
			
 
				-                "coding_composite": 0.764,
			
 
				-                "coding_quality": 0.657,
			
 
				-                "general_composite": 0.867,
			
 
				-                "general_quality": 0.886,
			
 
				-                "latency_ms": 1510.5,
			
 
				-                "latency_score": 0.698,
			
 
				-                "toks_norm": 0.982
			
 
				+                "coding_composite": 0.723,
			
 
				+                "coding_quality": 0.833,
			
 
				+                "general_composite": 0.746,
			
 
				+                "general_quality": 0.885,
			
 
				+                "latency_ms": 1568.5,
			
 
				+                "latency_score": 0.686,
			
 
				+                "toks_norm": 0.586
			
 
				             },
			
 
				-            "name": "deepseek-coder-v2:latest"
			
 
				+            "name": "deepseek-coder-v2:16b"
			
 
				         },
			
 
				         {
			
 
				-            "composite": 0.664,
			
 
				+            "composite": 0.655,
			
 
				             "metrics": {
			
 
				-                "avg_tok_per_sec": 12.3,
			
 
				+                "avg_tok_per_sec": 12.5,
			
 
				                 "category": "coding",
			
 
				-                "coding_composite": 0.664,
			
 
				-                "coding_quality": 0.683,
			
 
				-                "general_composite": 0.756,
			
 
				-                "general_quality": 0.888,
			
 
				-                "latency_ms": 1222.4,
			
 
				-                "latency_score": 0.756,
			
 
				-                "toks_norm": 0.56
			
 
				+                "coding_composite": 0.655,
			
 
				+                "coding_quality": 0.85,
			
 
				+                "general_composite": 0.682,
			
 
				+                "general_quality": 0.91,
			
 
				+                "latency_ms": 1431.0,
			
 
				+                "latency_score": 0.714,
			
 
				+                "toks_norm": 0.312
			
 
				             },
			
 
				             "name": "qwen2.5-coder:7b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.57,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 6.6,
			
 
				+                "category": "coding",
			
 
				+                "coding_composite": 0.57,
			
 
				+                "coding_quality": 0.85,
			
 
				+                "general_composite": 0.607,
			
 
				+                "general_quality": 0.931,
			
 
				+                "latency_ms": 2229.7,
			
 
				+                "latency_score": 0.554,
			
 
				+                "toks_norm": 0.164
			
 
				+            },
			
 
				+            "name": "qwen2.5-coder:14B"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.437,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 3.2,
			
 
				+                "category": "coding",
			
 
				+                "coding_composite": 0.437,
			
 
				+                "coding_quality": 0.833,
			
 
				+                "general_composite": 0.326,
			
 
				+                "general_quality": 0.586,
			
 
				+                "latency_ms": 4235.4,
			
 
				+                "latency_score": 0.153,
			
 
				+                "toks_norm": 0.08
			
 
				+            },
			
 
				+            "name": "codellama:34b"
			
 
				         }
			
 
				     ],
			
 
				     "general_ranking": [
			
 
				         {
			
 
				-            "composite": 0.961,
			
 
				+            "composite": 0.814,
			
 
				             "metrics": {
			
 
				-                "avg_tok_per_sec": 22.5,
			
 
				+                "avg_tok_per_sec": 23.0,
			
 
				                 "category": "general",
			
 
				-                "coding_composite": 0.846,
			
 
				-                "coding_quality": 0.723,
			
 
				-                "general_composite": 0.961,
			
 
				-                "general_quality": 0.979,
			
 
				-                "latency_ms": 580.7,
			
 
				-                "latency_score": 0.884,
			
 
				-                "toks_norm": 1.0
			
 
				+                "coding_composite": 0.786,
			
 
				+                "coding_quality": 0.89,
			
 
				+                "general_composite": 0.814,
			
 
				+                "general_quality": 0.954,
			
 
				+                "latency_ms": 754.8,
			
 
				+                "latency_score": 0.849,
			
 
				+                "toks_norm": 0.576
			
 
				             },
			
 
				             "name": "llama3.2:3b"
			
 
				         },
			
 
				         {
			
 
				-            "composite": 0.495,
			
 
				+            "composite": 0.623,
			
 
				             "metrics": {
			
 
				-                "avg_tok_per_sec": 5.6,
			
 
				+                "avg_tok_per_sec": 11.8,
			
 
				                 "category": "general",
			
 
				-                "coding_composite": 0.416,
			
 
				-                "coding_quality": 0.757,
			
 
				-                "general_composite": 0.495,
			
 
				-                "general_quality": 0.931,
			
 
				-                "latency_ms": 5975.8,
			
 
				+                "coding_composite": 0.599,
			
 
				+                "coding_quality": 0.823,
			
 
				+                "general_composite": 0.623,
			
 
				+                "general_quality": 0.877,
			
 
				+                "latency_ms": 2202.0,
			
 
				+                "latency_score": 0.56,
			
 
				+                "toks_norm": 0.294
			
 
				+            },
			
 
				+            "name": "llama3.1:8b"
			
 
				+        },
			
 
				+        {
			
 
				+            "composite": 0.481,
			
 
				+            "metrics": {
			
 
				+                "avg_tok_per_sec": 6.1,
			
 
				+                "category": "general",
			
 
				+                "coding_composite": 0.439,
			
 
				+                "coding_quality": 0.873,
			
 
				+                "general_composite": 0.481,
			
 
				+                "general_quality": 0.966,
			
 
				+                "latency_ms": 5941.9,
			
 
				                 "latency_score": 0,
			
 
				-                "toks_norm": 0.253
			
 
				+                "toks_norm": 0.153
			
 
				             },
			
 
				             "name": "gemma3:12b-it-q4_K_M"
			
 
				         }
			
 
				     ],
			
 
				     "slot1_general": "llama3.2:3b",
			
 
				-    "slot2_general": "gemma3:12b-it-q4_K_M",
			
 
				-    "slot3_coding": "deepseek-coder-v2:latest",
			
 
				-    "slot4_coding": "qwen2.5-coder:7b"
			
 
				+    "slot2_general": "llama3.1:8b",
			
 
				+    "slot3_coding": "deepseek-coder-v2:16b",
			
 
				+    "slot4_coding": "qwen2.5-coder:7b",
			
 
				+    "slot5_general_rotate": "gemma3:12b-it-q4_K_M",
			
 
				+    "slot6_coding_rotate": "qwen2.5-coder:14B"
			
 
				 }
			
--- a/inventory/group_vars/all.yml
+++ b/inventory/group_vars/all.yml
@@ -51,6 +51,7 @@ vault_approle_name: "ai-services"
 
				 # Service ports
			
 
				 keycloak_port: 8180
			
 
				 ollama_port: 11434
			
 
				+ollama_node0_port: 11435
			
 
				 qdrant_http_port: 6333
			
 
				 qdrant_grpc_port: 6334
			
 
				 
			
@@ -58,17 +59,19 @@ qdrant_grpc_port: 6334
 
				 ollama_host: "0.0.0.0:11434"
			
 
				 ollama_num_threads: 14
			
 
				 ollama_num_parallel: 2
			
 
				-ollama_max_loaded_models: 4
			
 
				+ollama_max_loaded_models: 3   # 3 per socket (6 total across both NUMA instances)
			
 
				 ollama_keep_alive: "-1"
			
 
				 ollama_flash_attention: "1"
			
 
				 
			
 
				 # NUMA/CPU affinity - Dell M630, 2x E5-2690v4
			
 
				 # CPUs are interleaved: odd = socket 1 (NUMA node 1), even = socket 0.
			
 
				 # Physical cores on node 1: 1,3,...,27 (14 cores). HT siblings: 29,31,...,55.
			
 
				+# Physical cores on node 0: 0,2,...,26 (14 cores). HT siblings: 28,30,...,54.
			
 
				 # Pinning to physical cores only eliminates HT contention on the memory bus.
			
 
				 # NUMA node 1 has ~120 GB free RAM vs node 0's ~75 GB.
			
 
				 ollama_numa_node: "1"
			
 
				 ollama_cpu_affinity: "1 3 5 7 9 11 13 15 17 19 21 23 25 27"
			
 
				+ollama_node0_cpu_affinity: "0 2 4 6 8 10 12 14 16 18 20 22 24 26"
			
 
				 ollama_binary_path: /usr/bin/ollama
			
 
				 
			
 
				 # Keycloak configuration
			
@@ -85,9 +88,27 @@ benchmark_thresholds:
 
				   min_quality_score: 0.6
			
 
				   min_composite_score: 0.55
			
 
				 
			
 
				-benchmark_toks_norm_ceiling: 22     # Observed hardware max on Dell M630 (22.5 tok/sec measured)
			
 
				+benchmark_toks_norm_ceiling: 40     # Conservative dual-socket estimate (was 22 single-socket)
			
 
				 benchmark_coding_threshold: 0.10    # Delta to classify a model as coding-specialized
			
 
				 
			
 
				+# Modelfile aliases created by 04_models.yml — excluded from benchmark to prevent
			
 
				+# 32k-token KV cache allocations stalling the run with 285-second response times.
			
 
				+benchmark_skip_aliases:
			
 
				+  - "coder-128k"
			
 
				+  - "coder-32k"
			
 
				+  - "coder-rotate"
			
 
				+  - "llama-family"
			
 
				+  - "gemma-family"
			
 
				+
			
 
				+benchmark_small_max_gb: 10    # upper size boundary for small pass (< 10 GB), based on runtime RAM
			
 
				+benchmark_medium_max_gb: 15   # upper size boundary for medium pass (10–15 GB), based on runtime RAM
			
 
				+benchmark_size_overhead_factor: 1.2  # ollama list shows disk size; multiply by this to estimate runtime RAM
			
 
				+benchmark_load_timeout: 180      # seconds — warm-up "Hi" prompt per model before benchmarking
			
 
				+benchmark_small_timeout: 90      # seconds per request, small models (<10 GB)
			
 
				+benchmark_medium_timeout: 240    # seconds per request, medium models (10–15 GB)
			
 
				+benchmark_large_timeout: 480     # seconds per request, large models (>15 GB)
			
 
				+benchmark_num_predict: 500       # cap output tokens; allows full coding responses (def+return+docstring+assert); worst-case: 6.5 tok/s→77s, 22 tok/s→23s
			
 
				+
			
 
				 # Explicit category overrides applied before heuristics. Keys are model names as
			
 
				 # returned by `ollama list`. Valid values: 'coding' or 'general'.
			
 
				 # Example: { "deepseek-coder-v2": "coding", "qwen2.5-coder:7b": "coding" }
			
@@ -97,7 +118,7 @@ model_category_overrides: {}
 
				 # These are the minimum set needed to populate all 4 slots with meaningful candidates.
			
 
				 baseline_models:
			
 
				   - "llama3.2:3b"
			
 
				-  - "deepseek-coder-v2"
			
 
				+  - "deepseek-coder-v2:16b"
			
 
				   - "qwen2.5-coder:7b"
			
 
				   - "llama3.1:8b"
			
 
				 
			
@@ -108,11 +129,6 @@ candidate_models:
 
				     expected_tokens_sec: 4.5
			
 
				     reason: "Larger qwen2.5-coder for higher quality"
			
 
				     category: coding
			
 
				-  - name: "deepseek-coder-v2:latest"
			
 
				-    size_gb: 9
			
 
				-    expected_tokens_sec: 8.0
			
 
				-    reason: "DeepSeek Coder V2 full model"
			
 
				-    category: coding
			
 
				   - name: "codegemma:7b-instruct-q5_K_M"
			
 
				     size_gb: 5.5
			
 
				     expected_tokens_sec: 12.0
			
@@ -124,8 +140,8 @@ candidate_models:
 
				     reason: "StarCoder2 coding specialist"
			
 
				     category: coding
			
 
				 
			
 
				-# OpenClaw default model
			
 
				-openclaw_model: "llama3.2:3b"
			
 
				+# OpenClaw default model — overridden dynamically by 08_openclaw.yml from slot1_general
			
 
				+openclaw_model: "deepseek-coder-v2:16b-lite-instruct-q4_K_M"
			
 
				 
			
 
				 # AWS Bedrock (OpenAI-compatible API via Open WebUI)
			
 
				 # Pass bearer_token on first run: -e "bedrock_bearer_token=<value>"
			
--- a/playbooks/01_vault.yml
+++ b/playbooks/01_vault.yml
@@ -132,12 +132,14 @@
 
				       register: vault_init_check
			
 
				       tags:
			
 
				         - vault-init
			
 
				+        - vault-unseal
			
 
				 
			
 
				     - name: "Vault | Set initialization status fact"
			
 
				       ansible.builtin.set_fact:
			
 
				         vault_is_initialized: "{{ vault_init_check.status != 501 }}"
			
 
				       tags:
			
 
				         - vault-init
			
 
				+        - vault-unseal
			
 
				 
			
 
				     - name: "Vault | Initialize Vault"
			
 
				       ansible.builtin.command:
			
@@ -235,6 +237,47 @@
 
				       tags:
			
 
				         - vault-unseal
			
 
				 
			
 
				+    # ── Auto-unseal on reboot ─────────────────────────────────────────
			
 
				+    - name: "Vault | Deploy unseal key to server"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: "{{ vault_init_data.unseal_keys_b64[0] }}"
			
 
				+        dest: /etc/vault.d/unseal.key
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+        mode: "0400"
			
 
				+      tags:
			
 
				+        - vault-unseal
			
 
				+        - vault-autounseal
			
 
				+
			
 
				+    - name: "Vault | Deploy vault-unseal.sh"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.sh.j2"
			
 
				+        dest: /usr/local/bin/vault-unseal.sh
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+        mode: "0750"
			
 
				+      tags:
			
 
				+        - vault-autounseal
			
 
				+
			
 
				+    - name: "Vault | Deploy vault-unseal.service"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/vault/vault-unseal.service.j2"
			
 
				+        dest: /etc/systemd/system/vault-unseal.service
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+        mode: "0644"
			
 
				+      notify: Reload systemd and restart vault-unseal
			
 
				+      tags:
			
 
				+        - vault-autounseal
			
 
				+
			
 
				+    - name: "Vault | Enable vault-unseal.service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: vault-unseal.service
			
 
				+        enabled: true
			
 
				+        daemon_reload: true
			
 
				+      tags:
			
 
				+        - vault-autounseal
			
 
				+
			
 
				     - name: "Vault | Set root token fact"
			
 
				       ansible.builtin.set_fact:
			
 
				         vault_root_token: "{{ vault_init_data.root_token }}"
			
@@ -516,3 +559,9 @@
 
				         name: vault
			
 
				         state: restarted
			
 
				         daemon_reload: true
			
 
				+
			
 
				+    - name: Reload systemd and restart vault-unseal
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: vault-unseal.service
			
 
				+        state: restarted
			
 
				+        daemon_reload: true
			
--- a/playbooks/02_infrastructure.yml
+++ b/playbooks/02_infrastructure.yml
@@ -155,6 +155,42 @@
 
				       tags:
			
 
				         - ollama
			
 
				 
			
 
				+    - name: "Ollama | Deploy ollama-node0 systemd unit"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/ollama/ollama-node0.service.j2"
			
 
				+        dest: /etc/systemd/system/ollama-node0.service
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      notify:
			
 
				+        - Reload systemd and start ollama-node0
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Enable and start ollama-node0"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama-node0
			
 
				+        enabled: true
			
 
				+        state: started
			
 
				+        daemon_reload: true
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				+    - name: "Ollama | Wait for ollama-node0 API to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
			
 
				+        method: GET
			
 
				+        headers:
			
 
				+          Authorization: "Bearer {{ ollama_api_key }}"
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: ollama_node0_ready
			
 
				+      retries: 24
			
 
				+      delay: 5
			
 
				+      until: ollama_node0_ready.status == 200
			
 
				+      tags:
			
 
				+        - ollama
			
 
				+
			
 
				     # ── OS-level kernel tuning for dedicated inference server ────────────────
			
 
				     - name: "OS Tune | Apply sysctl settings for inference workload"
			
 
				       ansible.posix.sysctl:
			
@@ -164,8 +200,8 @@
 
				         reload: true
			
 
				         state: present
			
 
				       loop:
			
 
				-        # Disable auto-NUMA migration — fights explicit numactl --membind=1 by
			
 
				-        # moving KV-cache pages mid-inference to a different NUMA node.
			
 
				+        # Disable auto-NUMA migration — CPUAffinity pins Ollama to node 1/0
			
 
				+        # physical cores; NUMA balancing could migrate pages mid-inference.
			
 
				         - { name: kernel.numa_balancing, value: "0" }
			
 
				         # Near-zero swappiness: prevents model weights being paged out under
			
 
				         # memory pressure (complements LimitMEMLOCK=infinity in the unit file).
			
@@ -261,6 +297,12 @@
 
				         state: restarted
			
 
				         daemon_reload: true
			
 
				 
			
 
				+    - name: Reload systemd and start ollama-node0
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama-node0
			
 
				+        state: started
			
 
				+        daemon_reload: true
			
 
				+
			
 
				     - name: Reload systemd daemon
			
 
				       ansible.builtin.systemd:
			
 
				         daemon_reload: true
			
--- a/playbooks/03_benchmark.yml
+++ b/playbooks/03_benchmark.yml
@@ -84,6 +84,31 @@
 
				       tags:
			
 
				         - benchmark-discover
			
 
				 
			
 
				+    - name: "Benchmark | Stop warmup services for clean benchmark run"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: "{{ item }}"
			
 
				+        state: stopped
			
 
				+      loop:
			
 
				+        - ollama-warmup.service
			
 
				+        - ollama-warmup-node0.service
			
 
				+      failed_when: false
			
 
				+      become: true
			
 
				+      tags:
			
 
				+        - benchmark-setup
			
 
				+
			
 
				+    - name: "Benchmark | Wait for node0 Ollama API to be ready"
			
 
				+      ansible.builtin.uri:
			
 
				+        url: "http://localhost:{{ ollama_node0_port }}/api/tags"
			
 
				+        method: GET
			
 
				+        status_code: 200
			
 
				+        timeout: 10
			
 
				+      register: ollama_node0_ready
			
 
				+      retries: 24
			
 
				+      delay: 5
			
 
				+      until: ollama_node0_ready.status == 200
			
 
				+      tags:
			
 
				+        - benchmark-setup
			
 
				+
			
 
				     - name: "Benchmark | Discover installed models"
			
 
				       ansible.builtin.command: ollama list
			
 
				       changed_when: false
			
@@ -100,44 +125,124 @@
 
				       tags:
			
 
				         - benchmark-discover
			
 
				 
			
 
				-    - name: "Benchmark | Set models_to_benchmark to all installed models"
			
 
				+    - name: "Benchmark | Parse model sizes from ollama list"
			
 
				       ansible.builtin.set_fact:
			
 
				-        models_to_benchmark: "{{ installed_models }}"
			
 
				+        _benchmark_sizes_json: |
			
 
				+          {% set ns = namespace(d={}) %}
			
 
				+          {% for line in ollama_list_output.stdout_lines[1:] %}
			
 
				+          {%   set p = line.split() %}
			
 
				+          {%   if p | length >= 4 %}
			
 
				+          {%     set gb = (p[2] | float) if (p[3] | upper == 'GB') else ((p[2] | float) / 1024) %}
			
 
				+          {%     set _ = ns.d.update({p[0]: gb}) %}
			
 
				+          {%   endif %}
			
 
				+          {% endfor %}
			
 
				+          {{ ns.d | to_json }}
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Partition models into small, medium, and large passes"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        _small_models:  "{{ _alias_filtered | select('in', _small_ok)  | list }}"
			
 
				+        _medium_models: "{{ _alias_filtered | select('in', _medium_ok) | list }}"
			
 
				+        _large_models:  "{{ _alias_filtered | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
			
 
				+        models_to_benchmark: "{{ _alias_filtered | list }}"
			
 
				+      vars:
			
 
				+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
			
 
				+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
			
 
				+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
			
 
				+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
			
 
				+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
			
 
				+                                            | selectattr('value', 'le', _medium_cut | float)
			
 
				+                                            | map(attribute='key') | list }}"
			
 
				+        _alias_filtered: "{{ installed_models | reject('match', '^(' ~ benchmark_skip_aliases | join('|') ~ ')(:|$)') | list }}"
			
 
				       when: benchmark_models | default('') | length == 0
			
 
				       tags:
			
 
				         - benchmark-discover
			
 
				 
			
 
				     - name: "Benchmark | Set models_to_benchmark to specified subset"
			
 
				       ansible.builtin.set_fact:
			
 
				-        models_to_benchmark: "{{ benchmark_models.split(',') | map('trim') | list }}"
			
 
				+        models_to_benchmark: "{{ _specified }}"
			
 
				+        _small_models:  "{{ _specified | select('in', _small_ok)  | list }}"
			
 
				+        _medium_models: "{{ _specified | select('in', _medium_ok) | list }}"
			
 
				+        _large_models:  "{{ _specified | reject('in', _small_ok)  | reject('in', _medium_ok) | list }}"
			
 
				+      vars:
			
 
				+        _specified: "{{ benchmark_models.split(',') | map('trim') | list }}"
			
 
				+        _sizes:     "{{ _benchmark_sizes_json | from_json }}"
			
 
				+        _small_cut:  "{{ (benchmark_small_max_gb  | float) / (benchmark_size_overhead_factor | float) }}"
			
 
				+        _medium_cut: "{{ (benchmark_medium_max_gb | float) / (benchmark_size_overhead_factor | float) }}"
			
 
				+        _small_ok:  "{{ _sizes | dict2items | selectattr('value', 'le', _small_cut  | float) | map(attribute='key') | list }}"
			
 
				+        _medium_ok: "{{ _sizes | dict2items | selectattr('value', 'gt', _small_cut  | float)
			
 
				+                                            | selectattr('value', 'le', _medium_cut | float)
			
 
				+                                            | map(attribute='key') | list }}"
			
 
				       when: benchmark_models | default('') | length > 0
			
 
				       tags:
			
 
				         - benchmark-discover
			
 
				 
			
 
				+    - name: "Benchmark | Initialize batch accumulator facts"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        bench_all_results: []
			
 
				+        all_eligible_models: []
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Build per-model benchmark timeout map"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        _benchmark_timeout_map_json: |
			
 
				+          {% set ns = namespace(d={}) %}
			
 
				+          {% for m in models_to_benchmark %}
			
 
				+          {%   if m in _small_models %}
			
 
				+          {%     set _ = ns.d.update({m: benchmark_small_timeout | int}) %}
			
 
				+          {%   elif m in _medium_models %}
			
 
				+          {%     set _ = ns.d.update({m: benchmark_medium_timeout | int}) %}
			
 
				+          {%   else %}
			
 
				+          {%     set _ = ns.d.update({m: benchmark_large_timeout | int}) %}
			
 
				+          {%   endif %}
			
 
				+          {% endfor %}
			
 
				+          {{ ns.d | to_json }}
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Parse benchmark timeout map"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        _benchmark_timeout_map: "{{ _benchmark_timeout_map_json | from_json }}"
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				+    - name: "Benchmark | Sort models largest-first so heaviest models land on node1 (120 GB)"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        models_to_benchmark: >-
			
 
				+          {{ (_large_models + _medium_models + _small_models)
			
 
				+             | select('in', models_to_benchmark) | list }}
			
 
				+      tags:
			
 
				+        - benchmark-discover
			
 
				+
			
 
				     - name: "Benchmark | Display models to benchmark"
			
 
				       ansible.builtin.debug:
			
 
				-        msg: "Will benchmark the following models: {{ models_to_benchmark }}"
			
 
				+        msg:
			
 
				+          - "Small  pass (timeout {{ benchmark_small_timeout }}s,  ≤{{ benchmark_small_max_gb }}GB):  {{ _small_models }}"
			
 
				+          - "Medium pass (timeout {{ benchmark_medium_timeout }}s, {{ benchmark_small_max_gb }}–{{ benchmark_medium_max_gb }}GB): {{ _medium_models }}"
			
 
				+          - "Large  pass (timeout {{ benchmark_large_timeout }}s, >{{ benchmark_medium_max_gb }}GB): {{ _large_models }}"
			
 
				+          - "Load timeout (warm-up 'Hi' prompt): {{ benchmark_load_timeout }}s"
			
 
				+          - "Total: {{ models_to_benchmark | length }} models, {{ (models_to_benchmark | batch(6) | list) | length }} batch(es) of ≤6"
			
 
				       tags:
			
 
				         - benchmark-discover
			
 
				 
			
 
				-    - name: "Benchmark | Run test prompts against each model"
			
 
				-      ansible.builtin.uri:
			
 
				-        url: "{{ ollama_api_url }}/api/generate"
			
 
				-        method: POST
			
 
				-        body_format: json
			
 
				-        body:
			
 
				-          model: "{{ item.0 }}"
			
 
				-          prompt: "{{ test_prompts[item.1].prompt }}"
			
 
				-          stream: false
			
 
				-        headers:
			
 
				-          Authorization: "Bearer {{ ollama_api_key }}"
			
 
				-        timeout: 300
			
 
				-        status_code: 200
			
 
				-      loop: "{{ models_to_benchmark | product(test_prompts.keys() | list) | list }}"
			
 
				+    - name: "Benchmark | Process batch {{ _loop_idx + 1 }} of {{ models_to_benchmark | batch(6) | list | length }}"
			
 
				+      ansible.builtin.include_tasks: _bench_tier_batch.yml
			
 
				+      vars:
			
 
				+        _batch_node1: "{{ _batch[:3] }}"
			
 
				+        _batch_node0: "{{ _batch[3:] }}"
			
 
				+      loop: "{{ models_to_benchmark | batch(6) | list }}"
			
 
				       loop_control:
			
 
				-        label: "{{ item.0 }} / {{ item.1 }}"
			
 
				-      register: benchmark_raw_results
			
 
				-      failed_when: false
			
 
				+        loop_var: _batch
			
 
				+        label: "batch {{ _loop_idx + 1 }}: node1={{ _batch[:3] }} node0={{ _batch[3:] }}"
			
 
				+        index_var: _loop_idx
			
 
				+      tags:
			
 
				+        - benchmark-run
			
 
				+
			
 
				+    - name: "Benchmark | Display models that failed to load"
			
 
				+      ansible.builtin.debug:
			
 
				+        msg: "Load failures (excluded from scoring): {{ models_to_benchmark | reject('in', all_eligible_models) | list }}"
			
 
				       tags:
			
 
				         - benchmark-run
			
 
				 
			
@@ -145,9 +250,9 @@
 
				       ansible.builtin.set_fact:
			
 
				         model_metrics: |
			
 
				           {% set ns = namespace(results={}) %}
			
 
				-          {% for model in models_to_benchmark %}
			
 
				+          {% for model in all_eligible_models %}
			
 
				           {%   set ns2 = namespace(coding_quality=0, coding_count=0, general_quality=0, general_count=0, total_toks=0, total_eval_time=0, ttft_sum=0, ttft_count=0, latency_ns=0) %}
			
 
				-          {%   for result in benchmark_raw_results.results %}
			
 
				+          {%   for result in bench_all_results %}
			
 
				           {%     if result.item[0] == model and result.status == 200 %}
			
 
				           {%       set test_name = result.item[1] %}
			
 
				           {%       set resp = result.json | default({}) %}
			
@@ -160,7 +265,7 @@
 
				           {%       set ns2.ttft_sum = ns2.ttft_sum + prompt_eval_duration %}
			
 
				           {%       set ns2.ttft_count = ns2.ttft_count + 1 %}
			
 
				           {%       if test_name == 'latency' %}
			
 
				-          {%         set ns2.latency_ns = eval_duration + prompt_eval_duration %}
			
 
				+          {%         set ns2.latency_ns = ((resp.total_duration | default(0) | int) - (resp.load_duration | default(0) | int)) | abs %}
			
 
				           {%       endif %}
			
 
				           {%       set resp_len = response_text | length %}
			
 
				           {%       if test_name in ['code_gen', 'debug', 'refactor'] %}
			
@@ -239,9 +344,14 @@
 
				           {% set coding_sorted = coding_models | sort(attribute='composite', reverse=true) %}
			
 
				           {% set slot1 = general_sorted[0].name if general_sorted | length > 0 else 'none' %}
			
 
				           {% set slot2 = general_sorted[1].name if general_sorted | length > 1 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
			
 
				+          {% set slot5 = general_sorted[2].name if general_sorted | length > 2 else 'none' %}
			
 
				           {% set slot3 = coding_sorted[0].name if coding_sorted | length > 0 else (general_sorted[0].name if general_sorted | length > 0 else 'none') %}
			
 
				           {% set slot4 = coding_sorted[1].name if coding_sorted | length > 1 else (coding_sorted[0].name if coding_sorted | length > 0 else 'none') %}
			
 
				-          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot3_coding': slot3, 'slot4_coding': slot4, 'all_metrics': parsed_metrics, 'general_ranking': general_sorted, 'coding_ranking': coding_sorted} | to_json }}
			
 
				+          {% set slot6 = coding_sorted[2].name if coding_sorted | length > 2 else 'none' %}
			
 
				+          {{ {'slot1_general': slot1, 'slot2_general': slot2, 'slot5_general_rotate': slot5,
			
 
				+              'slot3_coding': slot3, 'slot4_coding': slot4, 'slot6_coding_rotate': slot6,
			
 
				+              'all_metrics': parsed_metrics, 'general_ranking': general_sorted,
			
 
				+              'coding_ranking': coding_sorted} | to_json }}
			
 
				       tags:
			
 
				         - benchmark-select
			
 
				 
			
@@ -255,12 +365,16 @@
 
				       ansible.builtin.debug:
			
 
				         msg:
			
 
				           - "============================================="
			
 
				-          - "  MODEL SELECTION RESULTS"
			
 
				+          - "  MODEL SELECTION RESULTS  (6-slot / 2-socket)"
			
 
				           - "============================================="
			
 
				-          - "  Slot 1 (General Primary):  {{ selection.slot1_general }}"
			
 
				-          - "  Slot 2 (General Secondary): {{ selection.slot2_general }}"
			
 
				-          - "  Slot 3 (Coding Primary):   {{ selection.slot3_coding }}"
			
 
				-          - "  Slot 4 (Coding Secondary): {{ selection.slot4_coding }}"
			
 
				+          - "  Node 1 — General (port 11434)"
			
 
				+          - "  Slot 1 (locked):   {{ selection.slot1_general }}"
			
 
				+          - "  Slot 2 (locked):   {{ selection.slot2_general }}"
			
 
				+          - "  Slot 5 (rotate):   {{ selection.slot5_general_rotate }}"
			
 
				+          - "  Node 0 — Coding (port 11435)"
			
 
				+          - "  Slot 3 (locked):   {{ selection.slot3_coding }}"
			
 
				+          - "  Slot 4 (locked):   {{ selection.slot4_coding }}"
			
 
				+          - "  Slot 6 (rotate):   {{ selection.slot6_coding_rotate }}"
			
 
				           - "============================================="
			
 
				       tags:
			
 
				         - benchmark-select
			
@@ -276,13 +390,15 @@
 
				         content: |
			
 
				           # Benchmark Results - {{ benchmark_timestamp }}
			
 
				 
			
 
				-          ## Model Selection
			
 
				-          | Slot | Role | Model | Composite Score |
			
 
				-          |------|------|-------|----------------|
			
 
				-          | 1 | General (Primary) | {{ selection.slot1_general }} | {{ parsed_metrics[selection.slot1_general].general_composite | default('N/A') }} |
			
 
				-          | 2 | General (Secondary) | {{ selection.slot2_general }} | {{ parsed_metrics[selection.slot2_general].general_composite | default('N/A') }} |
			
 
				-          | 3 | Coding (Primary) | {{ selection.slot3_coding }} | {{ parsed_metrics[selection.slot3_coding].coding_composite | default('N/A') }} |
			
 
				-          | 4 | Coding (Secondary) | {{ selection.slot4_coding }} | {{ parsed_metrics[selection.slot4_coding].coding_composite | default('N/A') }} |
			
 
				+          ## Model Selection (6-slot / 2-socket)
			
 
				+          | Slot | Socket | Role | Model | Composite Score |
			
 
				+          |------|--------|------|-------|----------------|
			
 
				+          | 1 | Node 1 (port 11434) | General (locked) | {{ selection.slot1_general }} | {{ (parsed_metrics[selection.slot1_general].general_composite if selection.slot1_general in parsed_metrics else 'N/A') }} |
			
 
				+          | 2 | Node 1 (port 11434) | General (locked) | {{ selection.slot2_general }} | {{ (parsed_metrics[selection.slot2_general].general_composite if selection.slot2_general in parsed_metrics else 'N/A') }} |
			
 
				+          | 5 | Node 1 (port 11434) | General (rotate) | {{ selection.slot5_general_rotate }} | {{ (parsed_metrics[selection.slot5_general_rotate].general_composite if selection.slot5_general_rotate in parsed_metrics else 'N/A') }} |
			
 
				+          | 3 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot3_coding }} | {{ (parsed_metrics[selection.slot3_coding].coding_composite if selection.slot3_coding in parsed_metrics else 'N/A') }} |
			
 
				+          | 4 | Node 0 (port 11435) | Coding (locked) | {{ selection.slot4_coding }} | {{ (parsed_metrics[selection.slot4_coding].coding_composite if selection.slot4_coding in parsed_metrics else 'N/A') }} |
			
 
				+          | 6 | Node 0 (port 11435) | Coding (rotate) | {{ selection.slot6_coding_rotate }} | {{ (parsed_metrics[selection.slot6_coding_rotate].coding_composite if selection.slot6_coding_rotate in parsed_metrics else 'N/A') }} |
			
 
				 
			
 
				           ## Detailed Metrics
			
 
				           {% for model, metrics in parsed_metrics.items() %}
			
@@ -342,3 +458,15 @@
 
				       changed_when: true
			
 
				       tags:
			
 
				         - benchmark-pull
			
 
				+
			
 
				+    - name: "Benchmark | Restart warmup services after benchmark"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: "{{ item }}"
			
 
				+        state: restarted
			
 
				+      loop:
			
 
				+        - ollama-warmup.service
			
 
				+        - ollama-warmup-node0.service
			
 
				+      failed_when: false
			
 
				+      become: true
			
 
				+      tags:
			
 
				+        - benchmark-cleanup
			
--- a/playbooks/04_models.yml
+++ b/playbooks/04_models.yml
@@ -11,7 +11,9 @@
 
				   vars:
			
 
				     model_selection_file: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
			
 
				     modelfiles_dir: /mnt/ai_data/ollama_models/modelfiles
			
 
				-    slot4_model: ""
			
 
				+    slot4_model: ""   # legacy override kept for backwards compatibility
			
 
				+    slot5_model: ""   # overrides slot5_general_rotate
			
 
				+    slot6_model: ""   # overrides slot6_coding_rotate
			
 
				     ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
			
 
				 
			
 
				   tasks:
			
@@ -38,13 +40,31 @@
 
				       tags:
			
 
				         - models-load
			
 
				 
			
 
				+    - name: "Models | Apply slot5 override if provided"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_selection: "{{ model_selection | combine({'slot5_general_rotate': slot5_model}) }}"
			
 
				+      when: slot5_model | length > 0
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				+    - name: "Models | Apply slot6 override if provided"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        model_selection: "{{ model_selection | combine({'slot6_coding_rotate': slot6_model}) }}"
			
 
				+      when: slot6_model | length > 0
			
 
				+      tags:
			
 
				+        - models-load
			
 
				+
			
 
				     - name: "Models | Display selected models"
			
 
				       ansible.builtin.debug:
			
 
				         msg:
			
 
				-          - "Slot 1 (General Primary):   {{ model_selection.slot1_general }}"
			
 
				-          - "Slot 2 (General Secondary):  {{ model_selection.slot2_general }}"
			
 
				-          - "Slot 3 (Coding Primary):    {{ model_selection.slot3_coding }}"
			
 
				-          - "Slot 4 (Coding Secondary):  {{ model_selection.slot4_coding }}"
			
 
				+          - "=== Node 1 — General (port 11434) ==="
			
 
				+          - "Slot 1 (locked):  {{ model_selection.slot1_general }}"
			
 
				+          - "Slot 2 (locked):  {{ model_selection.slot2_general }}"
			
 
				+          - "Slot 5 (rotate):  {{ model_selection.slot5_general_rotate | default('none') }}"
			
 
				+          - "=== Node 0 — Coding (port 11435) ==="
			
 
				+          - "Slot 3 (locked):  {{ model_selection.slot3_coding }}"
			
 
				+          - "Slot 4 (locked):  {{ model_selection.slot4_coding }}"
			
 
				+          - "Slot 6 (rotate):  {{ model_selection.slot6_coding_rotate | default('none') }}"
			
 
				       tags:
			
 
				         - models-load
			
 
				 
			
@@ -72,8 +92,10 @@
 
				       loop:
			
 
				         - "{{ model_selection.slot1_general }}"
			
 
				         - "{{ model_selection.slot2_general }}"
			
 
				+        - "{{ model_selection.slot5_general_rotate | default('none') }}"
			
 
				         - "{{ model_selection.slot3_coding }}"
			
 
				         - "{{ model_selection.slot4_coding }}"
			
 
				+        - "{{ model_selection.slot6_coding_rotate | default('none') }}"
			
 
				       when:
			
 
				         - item | length > 0
			
 
				         - item != 'none'
			
@@ -130,6 +152,20 @@
 
				       tags:
			
 
				         - models-modelfile
			
 
				 
			
 
				+    - name: "Models | Template coder-rotate Modelfile"
			
 
				+      ansible.builtin.copy:
			
 
				+        content: |
			
 
				+          FROM {{ model_selection.slot6_coding_rotate }}
			
 
				+          PARAMETER num_ctx 32768
			
 
				+          SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
			
 
				+        dest: "{{ modelfiles_dir }}/Modelfile.coder-rotate"
			
 
				+        mode: "0644"
			
 
				+      when:
			
 
				+        - model_selection.slot6_coding_rotate | default('') | length > 0
			
 
				+        - model_selection.slot6_coding_rotate | default('none') != 'none'
			
 
				+      tags:
			
 
				+        - models-modelfile
			
 
				+
			
 
				     - name: "Models | Template llama-family Modelfile"
			
 
				       ansible.builtin.copy:
			
 
				         content: |
			
@@ -156,8 +192,9 @@
 
				     - name: "Models | Register custom models with Ollama"
			
 
				       ansible.builtin.command: "ollama create {{ item.name }} -f {{ modelfiles_dir }}/{{ item.file }}"
			
 
				       loop:
			
 
				-        - { name: "coder-128k", file: "Modelfile.coder-128k" }
			
 
				-        - { name: "coder-32k",  file: "Modelfile.coder-32k",  slot: "{{ model_selection.slot4_coding }}" }
			
 
				+        - { name: "coder-128k",   file: "Modelfile.coder-128k" }
			
 
				+        - { name: "coder-32k",    file: "Modelfile.coder-32k",    slot: "{{ model_selection.slot4_coding }}" }
			
 
				+        - { name: "coder-rotate", file: "Modelfile.coder-rotate", slot: "{{ model_selection.slot6_coding_rotate | default('none') }}" }
			
 
				         - { name: "llama-family", file: "Modelfile.llama-family" }
			
 
				         - { name: "gemma-family", file: "Modelfile.gemma-family" }
			
 
				       when: item.slot is not defined or (item.slot | length > 0 and item.slot != 'none')
			
@@ -201,3 +238,33 @@
 
				         state: started
			
 
				       tags:
			
 
				         - models-warmup
			
 
				+
			
 
				+    # ── Node0 warmup service ─────────────────────────────────────────
			
 
				+    - name: "Models | Template node0 warmup script"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/ollama/warmup-node0.sh.j2"
			
 
				+        dest: /usr/local/bin/ollama-warmup-node0.sh
			
 
				+        mode: "0755"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - models-warmup
			
 
				+
			
 
				+    - name: "Models | Template node0 warmup systemd service"
			
 
				+      ansible.builtin.template:
			
 
				+        src: "{{ playbook_dir }}/../templates/systemd/ollama-warmup-node0.service.j2"
			
 
				+        dest: /etc/systemd/system/ollama-warmup-node0.service
			
 
				+        mode: "0644"
			
 
				+        owner: root
			
 
				+        group: root
			
 
				+      tags:
			
 
				+        - models-warmup
			
 
				+
			
 
				+    - name: "Models | Enable and start node0 warmup service"
			
 
				+      ansible.builtin.systemd:
			
 
				+        name: ollama-warmup-node0
			
 
				+        enabled: true
			
 
				+        state: started
			
 
				+        daemon_reload: true
			
 
				+      tags:
			
 
				+        - models-warmup
			
--- a/playbooks/07_openwebui.yml
+++ b/playbooks/07_openwebui.yml
@@ -90,7 +90,7 @@
 
				         _openwebui_env: >-
			
 
				           {{
			
 
				             {
			
 
				-              'OLLAMA_BASE_URL': 'http://host.docker.internal:11434',
			
 
				+              'OLLAMA_BASE_URLS': 'http://host.docker.internal:11434;http://host.docker.internal:11435',
			
 
				               'OLLAMA_API_KEY': ollama_api_key,
			
 
				               'WEBUI_SECRET_KEY': openwebui_secret_key,
			
 
				               'WEBUI_AUTH': 'true',
			
--- a/playbooks/08_openclaw.yml
+++ b/playbooks/08_openclaw.yml
@@ -77,6 +77,27 @@
 
				       tags:
			
 
				         - openclaw-config
			
 
				 
			
 
				+    - name: "OpenClaw | Load model selection for model assignment"
			
 
				+      ansible.builtin.slurp:
			
 
				+        src: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
			
 
				+      delegate_to: localhost
			
 
				+      become: false
			
 
				+      register: _model_sel_raw
			
 
				+      ignore_errors: true
			
 
				+      when: not skip_openclaw
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				+    - name: "OpenClaw | Set openclaw_model from benchmark slot 1 (best general)"
			
 
				+      ansible.builtin.set_fact:
			
 
				+        openclaw_model: "{{ (_model_sel_raw.content | b64decode | from_json).slot1_general }}"
			
 
				+      when:
			
 
				+        - not skip_openclaw
			
 
				+        - _model_sel_raw is not failed
			
 
				+        - _model_sel_raw.content is defined
			
 
				+      tags:
			
 
				+        - openclaw-config
			
 
				+
			
 
				     # ── Install Python dependencies ───────────────────────────────────
			
 
				     - name: "OpenClaw | Install Python dependencies"
			
 
				       ansible.builtin.pip:
			
--- a/playbooks/_bench_tier_batch.yml
+++ b/playbooks/_bench_tier_batch.yml
@@ -0,0 +1,171 @@
 
				+---
			
 
				+# playbooks/_bench_tier_batch.yml
			
 
				+# Included by 03_benchmark.yml once per batch of up to 6 models.
			
 
				+#
			
 
				+# Expected vars (passed via include_tasks vars:):
			
 
				+#   _batch_node1  — list of 0–3 model names for port 11434
			
 
				+#   _batch_node0  — list of 0–3 model names for port 11435
			
 
				+#
			
 
				+# Mutates host facts (accumulated across batches):
			
 
				+#   bench_all_results    — list of uri result dicts
			
 
				+#   all_eligible_models  — list of model names that passed load
			
 
				+#
			
 
				+# Concurrency design:
			
 
				+#   Load:      node1 and node0 warm-up "Hi" prompts fire simultaneously (async).
			
 
				+#              Within each node Ollama still loads one model at a time,
			
 
				+#              but both nodes drain their queues in parallel.
			
 
				+#   Benchmark: sequential (synchronous uri), one request at a time per node.
			
 
				+#              Node1 drains fully, then node0. No queue contamination; each
			
 
				+#              request gets a full idle inference slot and clean eval_duration.
			
 
				+
			
 
				+# ── Load models into RAM (both nodes concurrently) ────────────────────────────
			
 
				+# 3 models per node, sequential within each node → last model waits for 2
			
 
				+# ahead: max load wait ≤ 2 × load_timeout. Use load_timeout × 4 for margin.
			
 
				+
			
 
				+- name: "Benchmark | Load node1 models into RAM (async)"
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "http://localhost:11434/api/generate"
			
 
				+    method: POST
			
 
				+    body_format: json
			
 
				+    body:
			
 
				+      model: "{{ item }}"
			
 
				+      prompt: "Hi"
			
 
				+      stream: false
			
 
				+    headers:
			
 
				+      Authorization: "Bearer {{ ollama_api_key }}"
			
 
				+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
			
 
				+    status_code: 200
			
 
				+  loop: "{{ _batch_node1 }}"
			
 
				+  loop_control:
			
 
				+    label: "node1 load: {{ item }}"
			
 
				+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
			
 
				+  poll: 0
			
 
				+  register: _load_node1_jobs
			
 
				+  failed_when: false
			
 
				+
			
 
				+- name: "Benchmark | Load node0 models into RAM (async)"
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
			
 
				+    method: POST
			
 
				+    body_format: json
			
 
				+    body:
			
 
				+      model: "{{ item }}"
			
 
				+      prompt: "Hi"
			
 
				+      stream: false
			
 
				+    headers:
			
 
				+      Authorization: "Bearer {{ ollama_api_key }}"
			
 
				+    timeout: "{{ (benchmark_load_timeout | int) * 4 }}"
			
 
				+    status_code: 200
			
 
				+  loop: "{{ _batch_node0 }}"
			
 
				+  loop_control:
			
 
				+    label: "node0 load: {{ item }}"
			
 
				+  async: "{{ (benchmark_load_timeout | int) * 5 }}"
			
 
				+  poll: 0
			
 
				+  register: _load_node0_jobs
			
 
				+  failed_when: false
			
 
				+
			
 
				+- name: "Benchmark | Collect node1 load results"
			
 
				+  ansible.builtin.async_status:
			
 
				+    jid: "{{ _async_job.ansible_job_id }}"
			
 
				+  loop: "{{ _load_node1_jobs.results | default([]) }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _async_job
			
 
				+    label: "node1 load: {{ _async_job.item | default('?') }}"
			
 
				+  register: _load_node1
			
 
				+  until: _load_node1.finished
			
 
				+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
			
 
				+  delay: 15
			
 
				+  failed_when: false
			
 
				+
			
 
				+- name: "Benchmark | Collect node0 load results"
			
 
				+  ansible.builtin.async_status:
			
 
				+    jid: "{{ _async_job.ansible_job_id }}"
			
 
				+  loop: "{{ _load_node0_jobs.results | default([]) }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _async_job
			
 
				+    label: "node0 load: {{ _async_job.item | default('?') }}"
			
 
				+  register: _load_node0
			
 
				+  until: _load_node0.finished
			
 
				+  retries: "{{ ((benchmark_load_timeout | int) * 5 / 15) | int + 5 }}"
			
 
				+  delay: 15
			
 
				+  failed_when: false
			
 
				+
			
 
				+# ── Identify successfully loaded models ───────────────────────────────────────
			
 
				+
			
 
				+- name: "Benchmark | Identify loaded models in batch"
			
 
				+  ansible.builtin.set_fact:
			
 
				+    _eligible_node1: "{{ _load_node1.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
			
 
				+    _eligible_node0: "{{ _load_node0.results | selectattr('status', 'equalto', 200) | map(attribute='_async_job') | map(attribute='item') | list }}"
			
 
				+
			
 
				+# ── Fire benchmark prompts sequentially (one request at a time per node) ──────
			
 
				+# Sequential firing ensures each request hits an idle Ollama inference slot:
			
 
				+# no queue contamination, full CPU budget per request, clean eval_duration.
			
 
				+# Node1 then node0 run back-to-back; concurrent load phase above is unchanged.
			
 
				+
			
 
				+- name: "Benchmark | Fire test prompts at node1"
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "http://localhost:11434/api/generate"
			
 
				+    method: POST
			
 
				+    body_format: json
			
 
				+    body:
			
 
				+      model: "{{ item.0 }}"
			
 
				+      prompt: "{{ test_prompts[item.1].prompt }}"
			
 
				+      stream: false
			
 
				+      options:
			
 
				+        num_predict: "{{ benchmark_num_predict | int }}"
			
 
				+        temperature: 0
			
 
				+        seed: 42
			
 
				+    headers:
			
 
				+      Authorization: "Bearer {{ ollama_api_key }}"
			
 
				+    timeout: "{{ (benchmark_large_timeout | int) }}"
			
 
				+    status_code: 200
			
 
				+  loop: "{{ _eligible_node1 | product(test_prompts.keys() | list) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.0 }} / {{ item.1 }}"
			
 
				+  register: _bench_node1
			
 
				+  failed_when: false
			
 
				+
			
 
				+- name: "Benchmark | Fire test prompts at node0"
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "http://localhost:{{ ollama_node0_port }}/api/generate"
			
 
				+    method: POST
			
 
				+    body_format: json
			
 
				+    body:
			
 
				+      model: "{{ item.0 }}"
			
 
				+      prompt: "{{ test_prompts[item.1].prompt }}"
			
 
				+      stream: false
			
 
				+      options:
			
 
				+        num_predict: "{{ benchmark_num_predict | int }}"
			
 
				+        temperature: 0
			
 
				+        seed: 42
			
 
				+    headers:
			
 
				+      Authorization: "Bearer {{ ollama_api_key }}"
			
 
				+    timeout: "{{ (benchmark_large_timeout | int) }}"
			
 
				+    status_code: 200
			
 
				+  loop: "{{ _eligible_node0 | product(test_prompts.keys() | list) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.0 }} / {{ item.1 }}"
			
 
				+  register: _bench_node0
			
 
				+  failed_when: false
			
 
				+
			
 
				+# ── Accumulate results into play-scoped facts ─────────────────────────────────
			
 
				+# Synchronous uri populates result.item = [model, prompt_key] at top level —
			
 
				+# no _async_job indirection needed; compute task in 03_benchmark.yml unchanged.
			
 
				+
			
 
				+- name: "Benchmark | Accumulate node1 results"
			
 
				+  ansible.builtin.set_fact:
			
 
				+    bench_all_results: "{{ bench_all_results + [item] }}"
			
 
				+  loop: "{{ _bench_node1.results | default([]) }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
			
 
				+
			
 
				+- name: "Benchmark | Accumulate node0 results"
			
 
				+  ansible.builtin.set_fact:
			
 
				+    bench_all_results: "{{ bench_all_results + [item] }}"
			
 
				+  loop: "{{ _bench_node0.results | default([]) }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ (item.item | default(['?', '?']))[0] }} / {{ (item.item | default(['?', '?']))[1] }}"
			
 
				+
			
 
				+- name: "Benchmark | Accumulate eligible models"
			
 
				+  ansible.builtin.set_fact:
			
 
				+    all_eligible_models: "{{ all_eligible_models + _eligible_node1 + _eligible_node0 }}"
			
--- a/roles/models/README.md
+++ b/roles/models/README.md
@@ -2,79 +2,109 @@
 
				 
			
 
				 ## Purpose
			
 
				 
			
 
				-Manage the Ollama model lifecycle -- pulling models, creating custom Modelfile
			
 
				-configurations, and running a warm-up service to ensure models are loaded into GPU
			
 
				-memory at boot time.
			
 
				+Manage the Ollama model lifecycle — pulling models, creating custom Modelfile
			
 
				+configurations, and running warm-up services to ensure models are loaded into RAM
			
 
				+at boot time across both NUMA instances.
			
 
				 
			
 
				-## Slot System
			
 
				+## 6-Slot System
			
 
				 
			
 
				-| Slot | Role               | Selection Method                         |
			
 
				-|------|--------------------|------------------------------------------|
			
 
				-| 1    | Primary Coding     | Highest coding composite from benchmarks |
			
 
				-| 2    | Primary General    | Highest general composite from benchmarks|
			
 
				-| 3    | Secondary / Backup | Next-best overall average composite      |
			
 
				-| 4    | Experimental       | Manual override via `-e slot4_model=<name>` |
			
 
				+| Slot | Instance      | Port  | Role             | Selection                      | Rotation                    |
			
 
				+|------|---------------|-------|------------------|--------------------------------|-----------------------------|
			
 
				+| 1    | Node 1        | 11434 | General (locked) | Top general composite          | Re-benchmark only           |
			
 
				+| 2    | Node 1        | 11434 | General (locked) | 2nd general composite          | Re-benchmark only           |
			
 
				+| 5    | Node 1        | 11434 | General (rotate) | 3rd general composite          | `-e slot5_model=<name>`     |
			
 
				+| 3    | Node 0        | 11435 | Coding (locked)  | Top coding composite           | Re-benchmark only           |
			
 
				+| 4    | Node 0        | 11435 | Coding (locked)  | 2nd coding composite           | Re-benchmark only           |
			
 
				+| 6    | Node 0        | 11435 | Coding (rotate)  | 3rd coding composite           | `-e slot6_model=<name>`     |
			
 
				 
			
 
				 ## Slot Rotation
			
 
				 
			
 
				-To override slot 4 with a specific model at runtime:
			
 
				+Rotate the general slot on Node 1 (port 11434):
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/03_ollama.yml -e slot4_model=mistral:7b
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot5_model=mistral:latest"
			
 
				 ```
			
 
				 
			
 
				-Slots 1-3 are automatically assigned based on the latest benchmark results in
			
 
				-`model_selection.json`. Slot 4 is always user-controlled.
			
 
				+Rotate the coding slot on Node 0 (port 11435):
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml -e "slot6_model=llama3.1:70b"
			
 
				+```
			
 
				+
			
 
				+Both at once:
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml \
			
 
				+  -e "slot5_model=mistral:latest" -e "slot6_model=command-r:35b"
			
 
				+```
			
 
				+
			
 
				+Reset both rotate slots back to benchmark recommendations:
			
 
				+
			
 
				+```bash
			
 
				+ansible-playbook playbooks/04_models.yml -K -e @local.yml
			
 
				+```
			
 
				 
			
 
				 ## Modelfile Configurations
			
 
				 
			
 
				-Custom Modelfile variants are created for fine-tuned context windows and use cases:
			
 
				+Custom Modelfile variants are created for fine-tuned context windows:
			
 
				+
			
 
				+| Custom Model    | Base Slot    | Context | Port  | Use Case                         |
			
 
				+|-----------------|--------------|---------|-------|----------------------------------|
			
 
				+| `coder-128k`    | slot3_coding | 32768   | 11435 | Primary coding (large context)   |
			
 
				+| `coder-32k`     | slot4_coding | 32768   | 11435 | Secondary coding                 |
			
 
				+| `coder-rotate`  | slot6_coding_rotate | 32768 | 11435 | Rotatable coding model      |
			
 
				+| `llama-family`  | llama3.2:3b  | 8192    | 11434 | Family-safe general assistant    |
			
 
				+| `gemma-family`  | llama3.1:8b  | 8192    | 11434 | Family-safe general assistant    |
			
 
				+
			
 
				+**These aliases are excluded from benchmarking** via `benchmark_skip_aliases` — their
			
 
				+32k-token parameter allocations stall the benchmark loop with 285-second responses.
			
 
				+
			
 
				+## Warm-up Services
			
 
				+
			
 
				+Two oneshot systemd services pre-load models after their respective Ollama instances start:
			
 
				 
			
 
				-| Custom Model          | Base Model           | Context Window | Use Case                    |
			
 
				-|-----------------------|----------------------|----------------|-----------------------------|
			
 
				-| `coding-primary`     | (slot 1 model)       | 32768          | Code generation and debugging |
			
 
				-| `general-primary`    | (slot 2 model)       | 16384          | General conversation and reasoning |
			
 
				-| `backup`             | (slot 3 model)       | 16384          | Fallback for either category |
			
 
				-| `experimental`       | (slot 4 model)       | 8192           | Testing new models           |
			
 
				+| Service                      | Warms               | Instance            |
			
 
				+|------------------------------|---------------------|---------------------|
			
 
				+| `ollama-warmup.service`      | slots 1, 2, 5       | Node 1 (port 11434) |
			
 
				+| `ollama-warmup-node0.service`| slots 3, 4, 6       | Node 0 (port 11435) |
			
 
				 
			
 
				-## Warm-up Service
			
 
				+`OLLAMA_KEEP_ALIVE=-1` keeps models pinned once loaded. The warmup services only
			
 
				+need to run once after boot; subsequent requests hit already-loaded models immediately.
			
 
				 
			
 
				-The role deploys `ollama-warmup.service`, a oneshot systemd service that runs after
			
 
				-`ollama.service` starts.
			
 
				+Check warmup status:
			
 
				 
			
 
				-**Why it is needed:** Even though `OLLAMA_KEEP_ALIVE=-1` keeps models loaded in GPU
			
 
				-memory indefinitely once loaded, Ollama does not automatically load models on
			
 
				-startup. The warm-up service sends a minimal inference request to each slot model,
			
 
				-triggering the initial load into GPU memory. Without this, the first user request
			
 
				-to each model would experience a long delay while the model is loaded.
			
 
				+```bash
			
 
				+systemctl status ollama-warmup ollama-warmup-node0
			
 
				+```
			
 
				 
			
 
				-The warm-up service:
			
 
				+Re-run warmup manually (e.g. after rotating a slot):
			
 
				 
			
 
				-1. Waits for Ollama API to be healthy
			
 
				-2. Sends a short prompt to each configured slot model
			
 
				-3. Exits after all models are loaded
			
 
				+```bash
			
 
				+systemctl restart ollama-warmup          # Node 1 general models
			
 
				+systemctl restart ollama-warmup-node0    # Node 0 coding models
			
 
				+```
			
 
				 
			
 
				 ## model_selection.json
			
 
				 
			
 
				-The model selection file is read by this role to determine which models to assign to
			
 
				-each slot. Schema:
			
 
				+`playbooks/04_models.yml` reads `benchmarks/results/model_selection.json`:
			
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-  "timestamp": "2025-01-15T10:30:00Z",
			
 
				-  "slot1_coding": "qwen2.5-coder:14b",
			
 
				-  "slot2_general": "llama3.1:8b",
			
 
				-  "slot3_backup": "deepseek-coder-v2:16b",
			
 
				-  "slot4_experimental": null
			
 
				+  "slot1_general": "llama3.1:8b",
			
 
				+  "slot2_general": "mistral:latest",
			
 
				+  "slot5_general_rotate": "llama3.2:3b",
			
 
				+  "slot3_coding": "deepseek-coder-v2:16b",
			
 
				+  "slot4_coding": "qwen2.5-coder:7b",
			
 
				+  "slot6_coding_rotate": "codegemma:7b",
			
 
				+  "general_ranking": [...],
			
 
				+  "coding_ranking": [...],
			
 
				+  "all_metrics": { ... }
			
 
				 }
			
 
				 ```
			
 
				 
			
 
				-If `model_selection.json` does not exist (first run before benchmarks), the role
			
 
				-falls back to default models defined in `group_vars/all.yml`.
			
 
				-
			
 
				 ## Tags
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/site.yml --tags models
			
 
				-ansible-playbook playbooks/site.yml --tags warmup
			
 
				+ansible-playbook playbooks/site.yml --tags models -K -e @local.yml
			
 
				+ansible-playbook playbooks/site.yml --tags models-warmup -K -e @local.yml
			
 
				 ```
			
--- a/roles/ollama/README.md
+++ b/roles/ollama/README.md
@@ -2,69 +2,80 @@
 
				 
			
 
				 ## Purpose
			
 
				 
			
 
				-Install, configure, and maintain the Ollama inference server on the AI server host.
			
 
				+Install, configure, and maintain Ollama inference server(s) on the AI server host.
			
 
				+Two instances run simultaneously — one per NUMA socket — to utilize both CPU sockets
			
 
				+on the Dell M630 (2× E5-2690v4).
			
 
				 
			
 
				-## Installation
			
 
				+## Instances
			
 
				 
			
 
				-Ollama is installed using the official install script, which places the binary at
			
 
				-`/usr/local/bin/ollama` and creates a systemd service. The script handles both fresh
			
 
				-installs and upgrades.
			
 
				+| Service                | Port  | NUMA Node | CPUs (physical only) | RAM binding | Purpose          |
			
 
				+|------------------------|-------|-----------|----------------------|-------------|------------------|
			
 
				+| `ollama.service`       | 11434 | Node 1    | 1 3 5 … 27 (odd)     | `--membind=1` | General models |
			
 
				+| `ollama-node0.service` | 11435 | Node 0    | 0 2 4 … 26 (even)    | `--membind=0` | Coding models  |
			
 
				 
			
 
				-## Environment Variables
			
 
				+Both instances share the same model storage directory (`/mnt/ai_data/ollama_models`)
			
 
				+and Ollama API key. Weights are loaded once into the NUMA node's memory; they are not
			
 
				+duplicated between instances.
			
 
				 
			
 
				-Configuration is applied via a systemd drop-in override file at
			
 
				-`/etc/systemd/system/ollama.service.d/override.conf`.
			
 
				+## Configuration
			
 
				 
			
 
				-| Variable                  | Value              | Description                                      |
			
 
				-|---------------------------|--------------------|--------------------------------------------------|
			
 
				-| `OLLAMA_HOST`             | `0.0.0.0:11434`   | Listen on all interfaces, port 11434             |
			
 
				-| `OLLAMA_MODELS`           | `/mnt/ai_data/ollama/models` | Model storage directory                |
			
 
				-| `OLLAMA_KEEP_ALIVE`       | `-1`               | Keep models loaded in GPU memory indefinitely    |
			
 
				-| `OLLAMA_NUM_PARALLEL`     | `4`                | Number of parallel inference requests            |
			
 
				-| `OLLAMA_MAX_LOADED_MODELS`| `4`                | Maximum models loaded in GPU memory at once      |
			
 
				-| `OLLAMA_API_KEY`          | (from Vault)       | API key for authentication                       |
			
 
				-| `OLLAMA_FLASH_ATTENTION`  | `1`                | Enable Flash Attention for performance           |
			
 
				-| `OLLAMA_CONTEXT_LENGTH`   | `32768`            | Default context window size                      |
			
 
				+### Node 1 — systemd override
			
 
				 
			
 
				-## Override.conf Approach
			
 
				+Applied via `/etc/systemd/system/ollama.service.d/override.conf` (templated from
			
 
				+`templates/ollama/override.conf.j2`):
			
 
				 
			
 
				-Rather than modifying the upstream systemd unit file (which would be overwritten on
			
 
				-upgrades), this role uses a systemd drop-in directory:
			
 
				+| Variable                   | Value                        | Description                                      |
			
 
				+|----------------------------|------------------------------|--------------------------------------------------|
			
 
				+| `OLLAMA_API_KEY`           | (from Vault)                 | Shared key for all API requests                  |
			
 
				+| `OLLAMA_HOST`              | `0.0.0.0:11434`              | Listen on all interfaces, port 11434             |
			
 
				+| `OLLAMA_MODELS`            | `/mnt/ai_data/ollama_models` | Shared model storage                             |
			
 
				+| `OLLAMA_KEEP_ALIVE`        | `-1`                         | Never unload models from RAM                     |
			
 
				+| `OLLAMA_FLASH_ATTENTION`   | `1`                          | Fused softmax — ~20% less memory bandwidth       |
			
 
				+| `OLLAMA_NUM_THREADS`       | `14`                         | Physical cores on NUMA node 1 only               |
			
 
				+| `OLLAMA_NUM_PARALLEL`      | `2`                          | Concurrent inference streams per instance        |
			
 
				+| `OLLAMA_MAX_LOADED_MODELS` | `3`                          | 3 models warm per instance (6 total)             |
			
 
				+| `CPUAffinity`              | `1 3 5 … 27`                 | Odd CPUs = socket 1 physical cores               |
			
 
				+| `ExecStart`                | `numactl --membind=1 ollama serve` | Pin memory allocations to Node 1 RAM        |
			
 
				 
			
 
				-```
			
 
				-/etc/systemd/system/ollama.service.d/override.conf
			
 
				-```
			
 
				+### Node 0 — standalone systemd unit
			
 
				+
			
 
				+Deployed to `/etc/systemd/system/ollama-node0.service` (from
			
 
				+`templates/ollama/ollama-node0.service.j2`). Uses the same variables but with:
			
 
				 
			
 
				-This ensures environment variables survive Ollama upgrades while keeping the
			
 
				-upstream service file intact.
			
 
				+| Variable   | Value           |
			
 
				+|------------|-----------------|
			
 
				+| `OLLAMA_HOST` | `0.0.0.0:11435` |
			
 
				+| `CPUAffinity` | `0 2 4 … 26` |
			
 
				+| `ExecStart`   | `numactl --membind=0 ollama serve` |
			
 
				 
			
 
				-## Why OLLAMA_API_KEY
			
 
				+## NUMA Rationale
			
 
				 
			
 
				-Without an API key, anyone with network access to port 11434 can use the Ollama API
			
 
				-to run inference, pull models, or delete models. Setting `OLLAMA_API_KEY` requires
			
 
				-all API requests to include an `Authorization: Bearer <key>` header, preventing
			
 
				-unauthenticated access.
			
 
				+On the M630 with dual E5-2690v4:
			
 
				+- **Node 1** (odd CPUs) has ~120 GB free RAM — assigned general models (larger)
			
 
				+- **Node 0** (even CPUs) has ~75 GB free RAM — assigned coding models
			
 
				+
			
 
				+Without `numactl --membind`, the OS allocates model weights and KV cache across both
			
 
				+nodes, causing cross-socket memory traffic (~40 GB/s vs ~68–75 GB/s local).
			
 
				+`CPUAffinity` alone sets the scheduler; `numactl` sets the memory policy.
			
 
				 
			
 
				 ## OLLAMA_FLASH_ATTENTION
			
 
				 
			
 
				-Flash Attention is a GPU memory optimization that reduces memory usage and increases
			
 
				-throughput for transformer inference. Setting `OLLAMA_FLASH_ATTENTION=1` enables
			
 
				-this optimization for all models. This is a newer addition to Ollama and provides
			
 
				-measurable performance improvements.
			
 
				+Enables fused softmax kernel — reduces attention memory bandwidth by ~20% and improves
			
 
				+throughput at all context lengths on AVX2 (E5-2690v4). Note: `OLLAMA_KV_CACHE_TYPE`
			
 
				+is intentionally **not** set — q8_0 dequantization overhead regressed throughput on
			
 
				+this CPU despite the bandwidth savings.
			
 
				 
			
 
				 ## Upgrade Procedure
			
 
				 
			
 
				-To upgrade Ollama to the latest version:
			
 
				-
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/03_ollama.yml
			
 
				+ansible-playbook playbooks/02_infrastructure.yml -K -e @local.yml --tags ollama
			
 
				 ```
			
 
				 
			
 
				-The official install script detects the existing installation and performs an
			
 
				-in-place upgrade. The service is restarted after the upgrade.
			
 
				+The official install script detects the existing installation and performs an in-place
			
 
				+upgrade. Both `ollama.service` and `ollama-node0.service` are restarted.
			
 
				 
			
 
				 ## Tags
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/site.yml --tags ollama
			
 
				+ansible-playbook playbooks/site.yml --tags ollama -K -e @local.yml
			
 
				 ```
			
--- a/roles/openclaw/README.md
+++ b/roles/openclaw/README.md
@@ -3,56 +3,62 @@
 
				 ## Purpose
			
 
				 
			
 
				 Deploy OpenClaw, a Telegram bot that provides access to Ollama models via Telegram
			
 
				-messaging.
			
 
				+messaging. Always uses the best warm general-purpose model (`slot1_general` from the
			
 
				+last benchmark run).
			
 
				 
			
 
				 ## Prerequisites
			
 
				 
			
 
				 - A Telegram bot token obtained from [@BotFather](https://t.me/BotFather)
			
 
				 - The token must be stored in Vault at `{{ vault_secret_prefix }}/openclaw:telegram_token`
			
 
				+- `benchmarks/results/model_selection.json` must exist (produced by `03_benchmark.yml`)
			
 
				 
			
 
				-## Installation
			
 
				+## Model Selection
			
 
				 
			
 
				-1. Node.js 20 is installed on the target host
			
 
				-2. OpenClaw is installed globally via `npm install -g openclaw`
			
 
				-3. A systemd service (`openclaw.service`) is created for process management
			
 
				+`08_openclaw.yml` reads `benchmarks/results/model_selection.json` at deploy time and
			
 
				+sets `openclaw_model` to `slot1_general` — the highest-scoring general model that is
			
 
				+always warm on the Node 1 instance (port 11434). This ensures the bot always uses the
			
 
				+best available model without requiring manual updates after a benchmark run.
			
 
				 
			
 
				-## Configuration
			
 
				+The fallback value (used when `model_selection.json` is absent) is set in
			
 
				+`inventory/group_vars/all.yml` under `openclaw_model`.
			
 
				 
			
 
				-Config file location: `/mnt/ai_data/openclaw/config.yml`
			
 
				+## Ollama Endpoint
			
 
				 
			
 
				-The configuration includes:
			
 
				+OpenClaw connects to `localhost:11434` — the Node 1 general instance. Coding models on
			
 
				+port 11435 are not accessible to the bot; they are reserved for IDE and API integrations.
			
 
				 
			
 
				-- Ollama API endpoint and authentication
			
 
				-- Telegram bot token (read from Vault)
			
 
				-- Default model selection
			
 
				-- Allowed user IDs (if access control is needed)
			
 
				+## Installation
			
 
				 
			
 
				-## Service
			
 
				+1. Python 3 dependencies (`python-telegram-bot`, `requests`, `pyyaml`) are installed via `pip3`
			
 
				+2. The bot script is deployed to `/mnt/ai_data/openclaw/bot.py`
			
 
				+3. Config is templated to `/mnt/ai_data/openclaw/config.yml`
			
 
				+4. A systemd service (`openclaw.service`) manages the process
			
 
				 
			
 
				-```
			
 
				-/etc/systemd/system/openclaw.service
			
 
				-```
			
 
				+## Configuration
			
 
				+
			
 
				+Config file location: `/mnt/ai_data/openclaw/config.yml`
			
 
				 
			
 
				-The service runs as a systemd unit, automatically starting on boot and restarting
			
 
				-on failure.
			
 
				+The configuration includes:
			
 
				+- Ollama API endpoint (`http://localhost:11434`) and API key (from Vault)
			
 
				+- Telegram bot token (from Vault)
			
 
				+- Model name (from `slot1_general`)
			
 
				 
			
 
				 ## Vault Integration
			
 
				 
			
 
				-The Telegram bot token is stored in Vault:
			
 
				-
			
 
				 - **Path:** `{{ vault_secret_prefix }}/openclaw`
			
 
				 - **Key:** `telegram_token`
			
 
				 
			
 
				-The role reads the token from Vault at deploy time and writes it to the config file.
			
 
				+The Telegram token is read from Vault at deploy time and written to the config file.
			
 
				 
			
 
				 ## Skipping Installation
			
 
				 
			
 
				-If no Telegram bot token is configured (the Vault secret is empty or absent),
			
 
				-the OpenClaw installation is skipped entirely during `site.yml`. This allows
			
 
				-running the full playbook without a Telegram bot token if the feature is not needed.
			
 
				+If no Telegram bot token is configured (Vault secret absent or empty), the entire
			
 
				+OpenClaw installation is skipped. This allows running `site.yml` without a Telegram
			
 
				+bot token.
			
 
				 
			
 
				 ## Tags
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/site.yml --tags openclaw
			
 
				+ansible-playbook playbooks/site.yml --tags openclaw -K -e @local.yml
			
 
				+ansible-playbook playbooks/08_openclaw.yml -K -e @local.yml
			
 
				 ```
			
--- a/roles/openwebui/README.md
+++ b/roles/openwebui/README.md
@@ -2,30 +2,41 @@
 
				 
			
 
				 ## Purpose
			
 
				 
			
 
				-Deploy Open WebUI with full Ollama integration, RAG support via Qdrant, and SSO via
			
 
				-Keycloak OIDC.
			
 
				+Deploy Open WebUI with full Ollama integration across both NUMA instances, RAG support
			
 
				+via Qdrant, and SSO via Keycloak OIDC.
			
 
				+
			
 
				+## Ollama Backend Configuration
			
 
				+
			
 
				+Open WebUI connects to **both** Ollama instances simultaneously via `OLLAMA_BASE_URLS`.
			
 
				+It load-balances requests across them and presents models from both as a single unified
			
 
				+list.
			
 
				+
			
 
				+| Instance      | Port  | Models              |
			
 
				+|---------------|-------|---------------------|
			
 
				+| Node 1        | 11434 | General (slots 1-2-5) |
			
 
				+| Node 0        | 11435 | Coding (slots 3-4-6) |
			
 
				 
			
 
				 ## Environment Variables
			
 
				 
			
 
				-| Variable                      | Value                                                        | Source      |
			
 
				-|-------------------------------|--------------------------------------------------------------|-------------|
			
 
				-| `OLLAMA_BASE_URL`             | `http://host.docker.internal:11434`                         | Hardcoded   |
			
 
				-| `OLLAMA_API_KEY`              | (Ollama API key)                                             | Vault       |
			
 
				-| `WEBUI_SECRET_KEY`            | (session signing key)                                        | Vault       |
			
 
				-| `VECTOR_DB`                   | `qdrant`                                                     | Hardcoded   |
			
 
				-| `QDRANT_URI`                  | `http://host.docker.internal:6333`                          | Hardcoded   |
			
 
				-| `ENABLE_RAG_WEB_SEARCH`      | `true`                                                       | Hardcoded   |
			
 
				-| `OAUTH_CLIENT_ID`            | `open-webui`                                                 | Hardcoded   |
			
 
				-| `OAUTH_CLIENT_SECRET`        | (OIDC client secret)                                         | Vault       |
			
 
				-| `OPENID_PROVIDER_URL`        | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration` | Vault (keycloak_oidc_url) |
			
 
				-| `OAUTH_PROVIDER_NAME`        | `{{ platform_name }}`                                        | group_vars  |
			
 
				-| `ENABLE_OAUTH_SIGNUP`        | `true`                                                       | Hardcoded   |
			
 
				-| `DEFAULT_USER_ROLE`          | `user`                                                       | Hardcoded   |
			
 
				-| `WEBUI_NAME`                 | `{{ platform_name }}`                                        | group_vars  |
			
 
				-| `ENABLE_OAUTH_ROLE_MANAGEMENT` | `true`                                                     | Hardcoded   |
			
 
				-| `OAUTH_ROLES_CLAIM`          | `realm_access.roles`                                         | Hardcoded   |
			
 
				-| `OAUTH_ALLOWED_ROLES`        | `ai-user,ai-admin`                                           | Hardcoded   |
			
 
				-| `OAUTH_ADMIN_ROLES`          | `ai-admin`                                                   | Hardcoded   |
			
 
				+| Variable                      | Value                                                                                     | Source      |
			
 
				+|-------------------------------|-------------------------------------------------------------------------------------------|-------------|
			
 
				+| `OLLAMA_BASE_URLS`            | `http://host.docker.internal:11434;http://host.docker.internal:11435`                    | Hardcoded   |
			
 
				+| `OLLAMA_API_KEY`              | (Ollama API key)                                                                          | Vault       |
			
 
				+| `RAG_OLLAMA_BASE_URL`         | `http://host.docker.internal:11434`                                                       | Hardcoded   |
			
 
				+| `WEBUI_SECRET_KEY`            | (session signing key)                                                                     | Vault       |
			
 
				+| `VECTOR_DB`                   | `qdrant`                                                                                  | Hardcoded   |
			
 
				+| `QDRANT_URI`                  | `http://host.docker.internal:6333`                                                        | Hardcoded   |
			
 
				+| `OAUTH_CLIENT_ID`             | `open-webui`                                                                              | Hardcoded   |
			
 
				+| `OAUTH_CLIENT_SECRET`         | (OIDC client secret)                                                                      | Vault       |
			
 
				+| `OPENID_PROVIDER_URL`         | `https://idm.<domain>/realms/<keycloak_realm>/.well-known/openid-configuration`           | Vault       |
			
 
				+| `OAUTH_PROVIDER_NAME`         | `{{ platform_name }}`                                                                     | group_vars  |
			
 
				+| `ENABLE_OAUTH_SIGNUP`         | `true`                                                                                    | Hardcoded   |
			
 
				+| `ENABLE_OAUTH_ROLE_MANAGEMENT`| `true`                                                                                    | Hardcoded   |
			
 
				+| `OAUTH_ROLES_CLAIM`           | `realm_access.roles`                                                                      | Hardcoded   |
			
 
				+| `OAUTH_ALLOWED_ROLES`         | `ai-user,ai-admin`                                                                        | Hardcoded   |
			
 
				+| `OAUTH_ADMIN_ROLES`           | `ai-admin`                                                                                | Hardcoded   |
			
 
				+| `DEFAULT_MODELS`              | `llama-family`                                                                            | Hardcoded   |
			
 
				+| `WEBUI_NAME`                  | `{{ platform_name }}`                                                                     | group_vars  |
			
 
				 
			
 
				 ## OIDC Setup
			
 
				 
			
@@ -38,22 +49,12 @@ Open WebUI uses Keycloak as its OIDC provider:
 
				 ## RAG
			
 
				 
			
 
				 - **Vector DB:** Qdrant at `http://host.docker.internal:6333`
			
 
				-- **Web search:** enabled via `ENABLE_RAG_WEB_SEARCH=true`
			
 
				-- Users can upload documents through the Open WebUI interface for RAG-augmented
			
 
				-  conversations
			
 
				-
			
 
				-## Model Access
			
 
				-
			
 
				-Open WebUI connects to Ollama at `http://host.docker.internal:11434` (the Docker
			
 
				-host network). The `OLLAMA_API_KEY` environment variable authenticates API requests
			
 
				-to the Ollama server.
			
 
				+- `RAG_OLLAMA_BASE_URL` is pinned to port 11434 (Node 1) for embedding requests —
			
 
				+  keeping RAG on a single stable endpoint avoids split-brain embedding indices
			
 
				+- Users can upload documents through the Open WebUI interface for RAG-augmented conversations
			
 
				 
			
 
				 ## SSO
			
 
				 
			
 
				-Users see a "Sign in with {{ platform_name }}" button on the login page. Clicking it
			
 
				-redirects to the Keycloak login page for the `{{ keycloak_realm }}` realm. After
			
 
				-authentication, users are redirected back to Open WebUI.
			
 
				-
			
 
				 Access is restricted by Keycloak realm role:
			
 
				 
			
 
				 | Keycloak role | Open WebUI access      |
			
@@ -62,12 +63,8 @@ Access is restricted by Keycloak realm role:
 
				 | `ai-admin`    | ✅ Admin               |
			
 
				 | *(none)*      | ❌ Login blocked       |
			
 
				 
			
 
				-New users who authenticate via SSO are automatically created. Their Open WebUI role
			
 
				-is set based on `OAUTH_ADMIN_ROLES` — users with `ai-admin` get admin access,
			
 
				-all others get standard user access.
			
 
				-
			
 
				 ## Tags
			
 
				 
			
 
				 ```bash
			
 
				-ansible-playbook playbooks/site.yml --tags openwebui
			
 
				+ansible-playbook playbooks/site.yml --tags openwebui -K -e @local.yml
			
 
				 ```
			
--- a/templates/ollama/ollama-node0.service.j2
+++ b/templates/ollama/ollama-node0.service.j2
@@ -0,0 +1,26 @@
 
				+[Unit]
			
 
				+Description=Ollama Service — NUMA Node 0 (Coding Models)
			
 
				+After=network-online.target ollama.service
			
 
				+Wants=network-online.target
			
 
				+
			
 
				+[Service]
			
 
				+ExecStart=/usr/bin/numactl --cpunodebind=0 {{ ollama_binary_path }} serve
			
 
				+Environment="OLLAMA_API_KEY={{ ollama_api_key }}"
			
 
				+Environment="OLLAMA_HOST=0.0.0.0:{{ ollama_node0_port }}"
			
 
				+Environment="OLLAMA_MODELS=/mnt/ai_data/ollama_models"
			
 
				+Environment="OLLAMA_KEEP_ALIVE={{ ollama_keep_alive }}"
			
 
				+Environment="OLLAMA_FLASH_ATTENTION={{ ollama_flash_attention }}"
			
 
				+Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
			
 
				+Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
			
 
				+Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
			
 
				+CPUAffinity={{ ollama_node0_cpu_affinity }}
			
 
				+LimitMEMLOCK=infinity
			
 
				+LimitNOFILE=65535
			
 
				+OOMScoreAdjust=-500
			
 
				+Restart=always
			
 
				+RestartSec=3
			
 
				+User=ollama
			
 
				+Group=ollama
			
 
				+
			
 
				+[Install]
			
 
				+WantedBy=multi-user.target
			
--- a/templates/ollama/override.conf.j2
+++ b/templates/ollama/override.conf.j2
@@ -9,12 +9,6 @@ Environment="OLLAMA_KEEP_ALIVE=-1"
 
				 # Flash attention: fused softmax, ~20% less memory bandwidth, faster on AVX2
			
 
				 Environment="OLLAMA_FLASH_ATTENTION=1"
			
 
				 
			
 
				-# KV cache quantization: q8_0 halves KV cache memory vs fp16.
			
 
				-# Attention reads dominate memory bandwidth at long contexts; smaller KV =
			
 
				-# fewer bytes transferred per token generated. q8_0 over q4_0: negligible
			
 
				-# quality loss vs significant noise at long contexts with q4_0.
			
 
				-Environment="OLLAMA_KV_CACHE_TYPE=q8_0"
			
 
				-
			
 
				 # Threads: 14 physical cores on NUMA node 1 only (no hyperthreads).
			
 
				 # LLM inference is memory-bandwidth-bound; HT siblings share the same memory
			
 
				 # pipeline and add scheduling overhead without adding bandwidth.
			
@@ -24,19 +18,26 @@ Environment="OLLAMA_NUM_THREADS={{ ollama_num_threads }}"
 
				 # Keeps per-request throughput high for interactive/single-user workloads.
			
 
				 Environment="OLLAMA_NUM_PARALLEL={{ ollama_num_parallel }}"
			
 
				 
			
 
				-# Keep 4 models warm in RAM (KEEP_ALIVE=-1 means never unload)
			
 
				+# Keep 3 models warm in RAM per instance (KEEP_ALIVE=-1 means never unload; 6 total across both sockets)
			
 
				 Environment="OLLAMA_MAX_LOADED_MODELS={{ ollama_max_loaded_models }}"
			
 
				 
			
 
				 # ── NUMA / CPU binding ────────────────────────────────────────────────────
			
 
				-# ExecStart override: numactl --membind=1 guarantees model weights and KV
			
 
				-# cache are allocated from NUMA node 1 RAM (120 GB free). CPUAffinity alone
			
 
				-# does not set the memory policy; numactl makes it explicit.
			
 
				+# numactl --cpunodebind pins the scheduler to all logical CPUs on node 1
			
 
				+# (14 physical + 14 HT siblings = 28 CPUs). This avoids two failure modes:
			
 
				+#
			
 
				+#  1. numactl --membind=1 (MPOL_BIND) suppresses khugepaged THP promotion
			
 
				+#     for the model's ~2.75 GB anonymous allocation, causing ~700k 4 KB TLB
			
 
				+#     entries and near-100% L2-STLB miss rate → 128x throughput loss.
			
 
				+#
			
 
				+#  2. CPUAffinity restricted to 14 physical cores only forces ~56 Go runtime
			
 
				+#     OS threads to compete with 14 GGML compute threads on 14 CPUs (5:1
			
 
				+#     oversubscription). GGML busy-wait barriers then block waiting threads
			
 
				+#     from checking in → cascading stall across ~400 ops/token → 128x loss.
			
 
				+#
			
 
				+# --cpunodebind (sched_setaffinity only, no set_mempolicy) gives 28 CPUs and
			
 
				+# MPOL_DEFAULT, so allocations go to node 1 naturally and THP works freely.
			
 
				 ExecStart=
			
 
				-ExecStart=/usr/bin/numactl --membind=1 {{ ollama_binary_path }} serve
			
 
				-
			
 
				-# Restrict scheduler to physical cores on node 1 only (odd CPUs 1–27).
			
 
				-# Omitting HT siblings (29–55) prevents cross-HT contention on the memory bus.
			
 
				-CPUAffinity={{ ollama_cpu_affinity }}
			
 
				+ExecStart=/usr/bin/numactl --cpunodebind={{ ollama_numa_node }} {{ ollama_binary_path }} serve
			
 
				 
			
 
				 # ── Memory hardening ───────────────────────────────────────────────────────
			
 
				 # Prevent model weights from being paged out under memory pressure
			
--- a/templates/ollama/warmup-node0.sh.j2
+++ b/templates/ollama/warmup-node0.sh.j2
@@ -0,0 +1,28 @@
 
				+#!/bin/bash
			
 
				+# Ollama Node 0 model warm-up script (coding models, port {{ ollama_node0_port }})
			
 
				+# Sends a 1-token generation to each slot model to pin them in RAM
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+OLLAMA_URL="http://localhost:{{ ollama_node0_port }}"
			
 
				+API_KEY="{{ ollama_api_key }}"
			
 
				+
			
 
				+warmup_model() {
			
 
				+    local model="$1"
			
 
				+    echo "[warmup-node0] Loading: $model"
			
 
				+    curl -sf -X POST "${OLLAMA_URL}/api/generate" \
			
 
				+        -H "Authorization: Bearer ${API_KEY}" \
			
 
				+        -H "Content-Type: application/json" \
			
 
				+        -d "{\"model\":\"${model}\",\"prompt\":\"Hi\",\"stream\":false,\"options\":{\"num_predict\":1}}" \
			
 
				+        > /dev/null || echo "[warmup-node0] Warning: failed to warm up ${model}"
			
 
				+    echo "[warmup-node0] Done: $model"
			
 
				+}
			
 
				+
			
 
				+warmup_model "{{ model_selection.slot3_coding }}"
			
 
				+warmup_model "{{ model_selection.slot4_coding }}"
			
 
				+{% if model_selection.slot6_coding_rotate | default('') | length > 0
			
 
				+      and model_selection.slot6_coding_rotate | default('none') != 'none' %}
			
 
				+warmup_model "{{ model_selection.slot6_coding_rotate }}"
			
 
				+{% endif %}
			
 
				+
			
 
				+echo "[warmup-node0] All Node 0 coding models warmed up."
			
--- a/templates/ollama/warmup.sh.j2
+++ b/templates/ollama/warmup.sh.j2
@@ -20,9 +20,9 @@ warmup_model() {
 
				 
			
 
				 warmup_model "{{ model_selection.slot1_general }}"
			
 
				 warmup_model "{{ model_selection.slot2_general }}"
			
 
				-warmup_model "{{ model_selection.slot3_coding }}"
			
 
				-{% if model_selection.slot4_coding | length > 0 and model_selection.slot4_coding != 'none' %}
			
 
				-warmup_model "{{ model_selection.slot4_coding }}"
			
 
				+{% if model_selection.slot5_general_rotate | default('') | length > 0
			
 
				+      and model_selection.slot5_general_rotate | default('none') != 'none' %}
			
 
				+warmup_model "{{ model_selection.slot5_general_rotate }}"
			
 
				 {% endif %}
			
 
				 
			
 
				-echo "[warmup] All models warmed up."
			
 
				+echo "[warmup] All Node 1 general models warmed up."
			
--- a/templates/systemd/ollama-warmup-node0.service.j2
+++ b/templates/systemd/ollama-warmup-node0.service.j2
@@ -0,0 +1,14 @@
 
				+[Unit]
			
 
				+Description=Ollama Model Warm-Up — Node 0 (Coding)
			
 
				+After=ollama-node0.service
			
 
				+Requires=ollama-node0.service
			
 
				+
			
 
				+[Service]
			
 
				+Type=oneshot
			
 
				+RemainAfterExit=yes
			
 
				+ExecStart=/usr/local/bin/ollama-warmup-node0.sh
			
 
				+StandardOutput=journal
			
 
				+StandardError=journal
			
 
				+
			
 
				+[Install]
			
 
				+WantedBy=multi-user.target
			
--- a/templates/vault/vault-unseal.service.j2
+++ b/templates/vault/vault-unseal.service.j2
@@ -0,0 +1,15 @@
 
				+[Unit]
			
 
				+Description=HashiCorp Vault Auto-Unseal
			
 
				+Documentation=https://developer.hashicorp.com/vault/docs/concepts/seal
			
 
				+After=vault.service network.target
			
 
				+Requires=vault.service
			
 
				+
			
 
				+[Service]
			
 
				+Type=oneshot
			
 
				+ExecStart=/usr/local/bin/vault-unseal.sh
			
 
				+RemainAfterExit=no
			
 
				+StandardOutput=journal
			
 
				+StandardError=journal
			
 
				+
			
 
				+[Install]
			
 
				+WantedBy=multi-user.target
			
--- a/templates/vault/vault-unseal.sh.j2
+++ b/templates/vault/vault-unseal.sh.j2
@@ -1,25 +1,25 @@
 
				 #!/bin/bash
			
 
				-# Vault auto-unseal script
			
 
				-# Reads unseal key from vault-init.json and unseals Vault
			
 
				+# Vault auto-unseal script — managed by Ansible, do not edit manually
			
 
				+# Reads unseal key from /etc/vault.d/unseal.key and unseals Vault
			
 
				 
			
 
				 set -e
			
 
				 
			
 
				-VAULT_ADDR="http://127.0.0.1:8200"
			
 
				-INIT_FILE="/docker_mounts/vault/vault-init.json"
			
 
				+VAULT_ADDR="http://127.0.0.1:{{ vault_port }}"
			
 
				+UNSEAL_KEY_FILE="/etc/vault.d/unseal.key"
			
 
				 
			
 
				-if [ ! -f "$INIT_FILE" ]; then
			
 
				-    echo "ERROR: vault-init.json not found at $INIT_FILE"
			
 
				+if [ ! -f "$UNSEAL_KEY_FILE" ]; then
			
 
				+    echo "ERROR: unseal key not found at $UNSEAL_KEY_FILE"
			
 
				     exit 1
			
 
				 fi
			
 
				 
			
 
				-UNSEAL_KEY=$(jq -r '.unseal_keys_b64[0]' "$INIT_FILE")
			
 
				+UNSEAL_KEY=$(cat "$UNSEAL_KEY_FILE")
			
 
				 
			
 
				 if [ -z "$UNSEAL_KEY" ]; then
			
 
				-    echo "ERROR: Could not extract unseal key from $INIT_FILE"
			
 
				+    echo "ERROR: unseal key file is empty"
			
 
				     exit 1
			
 
				 fi
			
 
				 
			
 
				-# Wait for Vault to be ready
			
 
				+# Wait for Vault API to become ready (up to 60 s)
			
 
				 for i in $(seq 1 30); do
			
 
				     STATUS=$(curl -sf "${VAULT_ADDR}/v1/sys/health" 2>/dev/null || true)
			
 
				     if [ -n "$STATUS" ]; then
			
@@ -30,7 +30,7 @@ for i in $(seq 1 30); do
 
				         fi
			
 
				         break
			
 
				     fi
			
 
				-    echo "Waiting for Vault... ($i/30)"
			
 
				+    echo "Waiting for Vault API... ($i/30)"
			
 
				     sleep 2
			
 
				 done
			
 
				 
			
@@ -38,5 +38,5 @@ echo "Unsealing Vault..."
 
				 curl -sf -X PUT "${VAULT_ADDR}/v1/sys/unseal" \
			
 
				     -H "Content-Type: application/json" \
			
 
				     -d "{\"key\": \"${UNSEAL_KEY}\"}"
			
 
				-
			
 
				+echo ""
			
 
				 echo "Vault unsealed successfully."
			
--- a/tftsr_nginx-hardening/CLAUDE.md
+++ b/tftsr_nginx-hardening/CLAUDE.md
@@ -0,0 +1,74 @@
 
				+# CLAUDE.md
			
 
				+
			
 
				+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
			
 
				+
			
 
				+## Target Environment
			
 
				+
			
 
				+- **OS:** RHEL 9.6, **NGINX:** 1.20.1 at `/etc/nginx/`, **Ansible:** `ansible_connection: local`
			
 
				+- **TLS certs:** `/etc/letsencrypt/live/tftsr.com-0001/{fullchain,privkey}.pem`
			
 
				+- **Services proxied:** 15 internal services on `*.tftsr.com` / `tftsr.com`
			
 
				+- `sudo dnf install -y ansible-core` is required before first run (not managed by this project)
			
 
				+
			
 
				+## Run Commands
			
 
				+
			
 
				+```bash
			
 
				+# Full hardening (all three roles)
			
 
				+ansible-playbook -K site.yml
			
 
				+
			
 
				+# Individual roles
			
 
				+ansible-playbook -K playbooks/nginx_hardening.yml
			
 
				+ansible-playbook -K playbooks/fail2ban.yml
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml
			
 
				+
			
 
				+# Refresh country IP ranges from ipdeny.com (run periodically)
			
 
				+ansible-playbook -K playbooks/update_geo_blocks.yml
			
 
				+
			
 
				+# Dry run — no changes applied
			
 
				+ansible-playbook -K --check site.yml
			
 
				+```
			
 
				+
			
 
				+## Architecture
			
 
				+
			
 
				+Three independent roles, each runnable standalone via `playbooks/`:
			
 
				+
			
 
				+### `nginx_hardening`
			
 
				+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
			
 
				+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
			
 
				+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
			
 
				+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
			
 
				+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
			
 
				+
			
 
				+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. The 4 services that already have HTTP→HTTPS redirects (keycloak-proxy, vault, ollama-api, vaultwarden) are not in `nginx_redirect_services`. Do not add `ssl_session_cache` to `00-ssl-params.conf` — all service configs already declare `shared:SSL:1m` in their server blocks and a conflicting http-level declaration will break `nginx -t`.
			
 
				+
			
 
				+### `fail2ban`
			
 
				+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
			
 
				+- `sshd` → `/var/log/secure`
			
 
				+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
			
 
				+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
			
 
				+
			
 
				+### `geo_blocking`
			
 
				+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
			
 
				+
			
 
				+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
			
 
				+
			
 
				+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
			
 
				+
			
 
				+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
			
 
				+```bash
			
 
				+# On a machine WITH internet access:
			
 
				+./scripts/download-geo-zones.sh /tmp/geo_zones
			
 
				+rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
			
 
				+
			
 
				+# Then run with the local cache:
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
			
 
				+```
			
 
				+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
			
 
				+
			
 
				+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
			
 
				+
			
 
				+## Key Design Decisions
			
 
				+
			
 
				+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
			
 
				+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
			
 
				+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
			
 
				+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.
			
--- a/tftsr_nginx-hardening/ansible.cfg
+++ b/tftsr_nginx-hardening/ansible.cfg
@@ -0,0 +1,4 @@
 
				+[defaults]
			
 
				+inventory = inventory/hosts.yml
			
 
				+roles_path = roles
			
 
				+host_key_checking = False
			
--- a/tftsr_nginx-hardening/inventory/hosts.yml
+++ b/tftsr_nginx-hardening/inventory/hosts.yml
@@ -0,0 +1,4 @@
 
				+all:
			
 
				+  hosts:
			
 
				+    localhost:
			
 
				+      ansible_connection: local
			
--- a/tftsr_nginx-hardening/nginx-hardening/CLAUDE.md
+++ b/tftsr_nginx-hardening/nginx-hardening/CLAUDE.md
@@ -0,0 +1,73 @@
 
				+# CLAUDE.md
			
 
				+
			
 
				+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
			
 
				+
			
 
				+## Target Environment
			
 
				+
			
 
				+- **OS:** RHEL 9, **NGINX:** 1.20+ at `/etc/nginx/`
			
 
				+- Playbooks target `hosts: all` — configure the target in `inventory/hosts.yml`
			
 
				+- `sudo dnf install -y ansible-core` is required on the control node before first run
			
 
				+
			
 
				+## Run Commands
			
 
				+
			
 
				+```bash
			
 
				+# Full hardening (all three roles)
			
 
				+ansible-playbook -K site.yml
			
 
				+
			
 
				+# Individual roles
			
 
				+ansible-playbook -K playbooks/nginx_hardening.yml
			
 
				+ansible-playbook -K playbooks/fail2ban.yml
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml
			
 
				+
			
 
				+# Refresh country IP ranges from ipdeny.com (run periodically)
			
 
				+ansible-playbook -K playbooks/update_geo_blocks.yml
			
 
				+
			
 
				+# Dry run — no changes applied
			
 
				+ansible-playbook -K --check site.yml
			
 
				+```
			
 
				+
			
 
				+## Architecture
			
 
				+
			
 
				+Three independent roles, each runnable standalone via `playbooks/`:
			
 
				+
			
 
				+### `nginx_hardening`
			
 
				+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they sort before all service configs:
			
 
				+- `00-security-headers.conf` — `server_tokens off`, HSTS, X-Frame-Options, rate-limit zone, client body size
			
 
				+- `00-ssl-params.conf` — TLS 1.2/1.3 only, cipher suite, OCSP stapling, resolver
			
 
				+- `00-proxy-params.conf` — strips `X-Powered-By`/`Server`, sets `X-Real-IP`/`X-Forwarded-*` headers
			
 
				+- `00-http-redirects.conf` — port-80 301 redirect server blocks for the 11 services that lack them
			
 
				+
			
 
				+**Critical constraint:** Existing service configs in `/etc/nginx/conf.d/` are never modified. Only list services in `nginx_redirect_services` that are **missing** a port-80 redirect — services that already have one must be excluded or NGINX will have duplicate `server_name` entries. Do not add `ssl_session_cache` to `00-ssl-params.conf` — if any existing service configs already declare `shared:SSL:Xm` in their server blocks, a conflicting http-level declaration with a different size will break `nginx -t`.
			
 
				+
			
 
				+### `fail2ban`
			
 
				+Installs fail2ban from EPEL, deploys filter definitions and `jail.local`. Three jails:
			
 
				+- `sshd` → `/var/log/secure`
			
 
				+- `nginx-4xx` → `/var/log/nginx/access.log` (regex: any 4xx)
			
 
				+- `nginx-auth` → `/var/log/nginx/access.log` (regex: 401/403 only)
			
 
				+
			
 
				+### `geo_blocking`
			
 
				+Downloads per-country CIDR files from `ipdeny.com/ipblocks/data/aggregated/{cc}-aggregated.zone` at runtime, assembles them into a single nftables set, and loads a standalone `table inet geo_block` (does not touch any existing nftables rules). The include line is appended to `/etc/sysconfig/nftables.conf`. Downloads use `ignore_errors: yes` — missing zone files are silently skipped.
			
 
				+
			
 
				+**To unblock a country:** set `blocked: false` for its entry in `roles/geo_blocking/defaults/main.yml` and re-run `update_geo_blocks.yml`.
			
 
				+
			
 
				+**ipdeny-absent territories** (no zone file exists — permanently `blocked: false`, no IPs to block): BV, CX, EH, GS, HM, PN, SH, SJ, TF, XK.
			
 
				+
			
 
				+**DMZ host has no outbound internet** — zone files must be pre-downloaded elsewhere and copied over:
			
 
				+```bash
			
 
				+# On a machine WITH internet access:
			
 
				+./scripts/download-geo-zones.sh /tmp/geo_zones
			
 
				+rsync -av --no-group /tmp/geo_zones/ user@your-host:/opt/geo_zones/
			
 
				+
			
 
				+# Then run with the local cache:
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
			
 
				+```
			
 
				+The role does a fast 8-second HEAD check to ipdeny.com first; if it fails and `geo_zone_files_dir` is unset, the play fails immediately rather than timing out on all 238 countries.
			
 
				+
			
 
				+**YAML boolean trap:** `code: NO` (Norway) is parsed as boolean `false` by PyYAML (YAML 1.1). It must stay quoted as `code: "NO"`. Watch for this if adding new entries.
			
 
				+
			
 
				+## Key Design Decisions
			
 
				+
			
 
				+- All `template`/`copy`/`lineinfile` tasks use `backup: yes` — timestamped backups are created automatically on every run alongside the modified file.
			
 
				+- The nft template opens with `add table inet geo_block` + `flush table inet geo_block` for idempotency (safe to re-run).
			
 
				+- The `geo_blocking` role downloads zone files to a `tempfile` directory and cleans it up at the end of every run.
			
 
				+- Handlers fire only when a task reports `changed` — NGINX reload and fail2ban restart are not triggered on idempotent re-runs.
			
--- a/tftsr_nginx-hardening/nginx-hardening/README.md
+++ b/tftsr_nginx-hardening/nginx-hardening/README.md
@@ -0,0 +1,179 @@
 
				+# nginx-hardening
			
 
				+
			
 
				+Ansible project to harden an NGINX reverse proxy to a production security posture. Applies security headers, TLS hardening, HTTP→HTTPS redirects, fail2ban jails, and nftables-based country geo-blocking — without modifying any existing service configurations.
			
 
				+
			
 
				+## Target environment
			
 
				+
			
 
				+- **OS:** RHEL 9 / Rocky Linux 9 / AlmaLinux 9
			
 
				+- **NGINX:** 1.20+ with existing service configs in `/etc/nginx/conf.d/`
			
 
				+- **EPEL:** Must be installed before running (`dnf install -y epel-release`)
			
 
				+- **nftables:** Installed but not required to be running (managed by this project)
			
 
				+- **firewalld:** Should be inactive to avoid nftables coexistence issues
			
 
				+
			
 
				+## What it does
			
 
				+
			
 
				+### Role: `nginx_hardening`
			
 
				+Deploys four files to `/etc/nginx/conf.d/` prefixed `00-` so they load before all service configs:
			
 
				+
			
 
				+| File | Purpose |
			
 
				+|------|---------|
			
 
				+| `00-security-headers.conf` | `server_tokens off`, HSTS, X-Frame-Options, X-Content-Type-Options, CSP, rate-limit zone |
			
 
				+| `00-ssl-params.conf` | TLS 1.2/1.3 only, hardened cipher suite, OCSP stapling, session timeout |
			
 
				+| `00-proxy-params.conf` | Strips `X-Powered-By`/`Server`, sets `X-Real-IP` and `X-Forwarded-*` headers |
			
 
				+| `00-http-redirects.conf` | Port-80 → HTTPS 301 redirects for services listed in `nginx_redirect_services` |
			
 
				+
			
 
				+**No existing service configs are modified.**
			
 
				+
			
 
				+### Role: `fail2ban`
			
 
				+Installs fail2ban from EPEL and configures three jails:
			
 
				+
			
 
				+| Jail | Log | Trigger |
			
 
				+|------|-----|---------|
			
 
				+| `sshd` | `/var/log/secure` | Failed SSH logins |
			
 
				+| `nginx-4xx` | `/var/log/nginx/access.log` | Repeated 4xx responses |
			
 
				+| `nginx-auth` | `/var/log/nginx/access.log` | Repeated 401/403 responses |
			
 
				+
			
 
				+### Role: `geo_blocking`
			
 
				+Builds a standalone `table inet geo_block` nftables ruleset populated with CIDRs for every country except the US, downloaded from [ipdeny.com](https://www.ipdeny.com). The table is loaded at boot via `/etc/sysconfig/nftables.conf`.
			
 
				+
			
 
				+## Prerequisites
			
 
				+
			
 
				+On the **Ansible control node** (the machine you run `ansible-playbook` from):
			
 
				+```bash
			
 
				+# Ansible itself
			
 
				+pip install ansible-core
			
 
				+# or
			
 
				+dnf install -y ansible-core
			
 
				+```
			
 
				+
			
 
				+On the **target host** (applied automatically by the playbooks):
			
 
				+- EPEL repo must already be installed
			
 
				+- SSH access with a user that can `sudo`
			
 
				+
			
 
				+## Setup
			
 
				+
			
 
				+### 1. Configure your inventory
			
 
				+
			
 
				+Edit `inventory/hosts.yml`:
			
 
				+```yaml
			
 
				+all:
			
 
				+  hosts:
			
 
				+    nginx-proxy:
			
 
				+      ansible_host: 192.168.1.10          # your server's IP or hostname
			
 
				+      ansible_user: your_ssh_user
			
 
				+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
			
 
				+```
			
 
				+
			
 
				+### 2. Configure HTTP→HTTPS redirects
			
 
				+
			
 
				+Edit `roles/nginx_hardening/defaults/main.yml` and populate `nginx_redirect_services` with any services that are **missing** a port-80 redirect in their existing NGINX config:
			
 
				+
			
 
				+```yaml
			
 
				+nginx_redirect_services:
			
 
				+  - name: myapp
			
 
				+    server_name: myapp.example.com
			
 
				+  - name: dashboard
			
 
				+    server_name: dashboard.example.com
			
 
				+```
			
 
				+
			
 
				+Services that already have a redirect in their existing `conf.d/` file should **not** be listed here.
			
 
				+
			
 
				+### 3. (Optional) Tune defaults
			
 
				+
			
 
				+All tunable variables live in each role's `defaults/main.yml`:
			
 
				+
			
 
				+| Variable | Default | Description |
			
 
				+|----------|---------|-------------|
			
 
				+| `nginx_hsts_max_age` | `31536000` | HSTS max-age in seconds |
			
 
				+| `nginx_rate_limit_req_zone` | `30r/m` | Rate limit zone definition |
			
 
				+| `nginx_client_max_body_size` | `10m` | Max upload body size |
			
 
				+| `fail2ban_bantime` | `3600` | Ban duration (seconds) |
			
 
				+| `fail2ban_maxretry_ssh` | `5` | SSH failures before ban |
			
 
				+| `fail2ban_maxretry_nginx_auth` | `5` | 401/403 failures before ban |
			
 
				+
			
 
				+## Running
			
 
				+
			
 
				+```bash
			
 
				+# Full hardening (all roles)
			
 
				+ansible-playbook -K site.yml
			
 
				+
			
 
				+# Individual roles
			
 
				+ansible-playbook -K playbooks/nginx_hardening.yml
			
 
				+ansible-playbook -K playbooks/fail2ban.yml
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml
			
 
				+
			
 
				+# Refresh country IP ranges (run periodically — ipdeny.com updates regularly)
			
 
				+ansible-playbook -K playbooks/update_geo_blocks.yml
			
 
				+
			
 
				+# Dry run — no changes applied
			
 
				+ansible-playbook -K --check site.yml
			
 
				+```
			
 
				+
			
 
				+`-K` prompts for the sudo password. Omit it if your user has passwordless sudo.
			
 
				+
			
 
				+## Geo-blocking: servers without direct internet access
			
 
				+
			
 
				+If your target server cannot reach `ipdeny.com`, pre-download the zone files on a machine that can and copy them over:
			
 
				+
			
 
				+```bash
			
 
				+# On a machine WITH unrestricted internet access:
			
 
				+./scripts/download-geo-zones.sh /tmp/geo_zones
			
 
				+
			
 
				+# Copy to the target server:
			
 
				+rsync -av --no-group /tmp/geo_zones/ user@your-server:/opt/geo_zones/
			
 
				+
			
 
				+# Run the playbook pointing at the local cache:
			
 
				+ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
			
 
				+```
			
 
				+
			
 
				+To make the cache path permanent, add it to your inventory:
			
 
				+```yaml
			
 
				+all:
			
 
				+  hosts:
			
 
				+    nginx-proxy:
			
 
				+      ansible_host: 192.168.1.10
			
 
				+      ansible_user: your_ssh_user
			
 
				+      geo_zone_files_dir: /opt/geo_zones
			
 
				+```
			
 
				+
			
 
				+### Unblocking a country
			
 
				+
			
 
				+Set `blocked: false` for the desired country code in `roles/geo_blocking/defaults/main.yml`, then re-run `update_geo_blocks.yml`.
			
 
				+
			
 
				+## Verification
			
 
				+
			
 
				+After a successful run:
			
 
				+
			
 
				+```bash
			
 
				+# NGINX config is valid
			
 
				+sudo nginx -t
			
 
				+
			
 
				+# Security headers are present
			
 
				+curl -sI https://your-domain.com | grep -i 'strict\|x-frame\|x-content'
			
 
				+
			
 
				+# HTTP redirects to HTTPS
			
 
				+curl -I http://your-domain.com   # expect: 301 Moved Permanently
			
 
				+
			
 
				+# fail2ban jails are active
			
 
				+sudo fail2ban-client status
			
 
				+sudo fail2ban-client status nginx-4xx
			
 
				+
			
 
				+# nftables geo-block table is loaded
			
 
				+sudo nft list table inet geo_block
			
 
				+```
			
 
				+
			
 
				+## Files written to the target host
			
 
				+
			
 
				+| Path | Action |
			
 
				+|------|--------|
			
 
				+| `/etc/nginx/conf.d/00-security-headers.conf` | Created |
			
 
				+| `/etc/nginx/conf.d/00-ssl-params.conf` | Created |
			
 
				+| `/etc/nginx/conf.d/00-proxy-params.conf` | Created |
			
 
				+| `/etc/nginx/conf.d/00-http-redirects.conf` | Created |
			
 
				+| `/etc/fail2ban/jail.local` | Created |
			
 
				+| `/etc/fail2ban/filter.d/nginx-4xx.conf` | Created |
			
 
				+| `/etc/fail2ban/filter.d/nginx-auth.conf` | Created |
			
 
				+| `/etc/nftables.d/geo-block.nft` | Created |
			
 
				+| `/etc/sysconfig/nftables.conf` | Appended (include line) |
			
 
				+
			
 
				+All tasks that write files use `backup: yes` — a timestamped copy is created automatically before each overwrite.
			
--- a/tftsr_nginx-hardening/nginx-hardening/ansible.cfg
+++ b/tftsr_nginx-hardening/nginx-hardening/ansible.cfg
@@ -0,0 +1,4 @@
 
				+[defaults]
			
 
				+inventory = inventory/hosts.yml
			
 
				+roles_path = roles
			
 
				+host_key_checking = False
			
--- a/tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/inventory/hosts.yml
@@ -0,0 +1,7 @@
 
				+all:
			
 
				+  hosts:
			
 
				+    nginx-proxy:
			
 
				+      ansible_host: YOUR_SERVER_IP
			
 
				+      ansible_user: YOUR_SSH_USER
			
 
				+      # ansible_ssh_private_key_file: ~/.ssh/id_rsa
			
 
				+      # geo_zone_files_dir: /opt/geo_zones   # set if server cannot reach ipdeny.com
			
--- a/tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/playbooks/fail2ban.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- hosts: all
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - fail2ban
			
--- a/tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/playbooks/geo_blocking.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- hosts: all
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - geo_blocking
			
--- a/tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/playbooks/nginx_hardening.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- hosts: all
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - nginx_hardening
			
--- a/tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/playbooks/update_geo_blocks.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- hosts: all
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - geo_blocking
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/defaults/main.yml
@@ -0,0 +1,7 @@
 
				+---
			
 
				+fail2ban_bantime: 3600
			
 
				+fail2ban_findtime: 600
			
 
				+fail2ban_maxretry_ssh: 5
			
 
				+fail2ban_maxretry_nginx_4xx: 20
			
 
				+fail2ban_maxretry_nginx_auth: 5
			
 
				+fail2ban_ignoreip: "127.0.0.1/8 ::1"
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/handlers/main.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- name: restart fail2ban
			
 
				+  ansible.builtin.service:
			
 
				+    name: fail2ban
			
 
				+    state: restarted
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/tasks/main.yml
@@ -0,0 +1,41 @@
 
				+---
			
 
				+- name: Install fail2ban
			
 
				+  ansible.builtin.dnf:
			
 
				+    name: fail2ban
			
 
				+    state: present
			
 
				+
			
 
				+- name: Deploy nginx-4xx filter
			
 
				+  ansible.builtin.template:
			
 
				+    src: nginx-4xx.conf.j2
			
 
				+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Deploy nginx-auth filter
			
 
				+  ansible.builtin.template:
			
 
				+    src: nginx-auth.conf.j2
			
 
				+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Deploy jail.local configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: jail.local.j2
			
 
				+    dest: /etc/fail2ban/jail.local
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Enable and start fail2ban service
			
 
				+  ansible.builtin.service:
			
 
				+    name: fail2ban
			
 
				+    state: started
			
 
				+    enabled: yes
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/jail.local.j2
@@ -0,0 +1,22 @@
 
				+[DEFAULT]
			
 
				+ignoreip = {{ fail2ban_ignoreip }}
			
 
				+bantime  = {{ fail2ban_bantime }}
			
 
				+findtime = {{ fail2ban_findtime }}
			
 
				+
			
 
				+[sshd]
			
 
				+enabled  = true
			
 
				+port     = ssh
			
 
				+logpath  = /var/log/secure
			
 
				+maxretry = {{ fail2ban_maxretry_ssh }}
			
 
				+
			
 
				+[nginx-4xx]
			
 
				+enabled  = true
			
 
				+filter   = nginx-4xx
			
 
				+logpath  = /var/log/nginx/access.log
			
 
				+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
			
 
				+
			
 
				+[nginx-auth]
			
 
				+enabled  = true
			
 
				+filter   = nginx-auth
			
 
				+logpath  = /var/log/nginx/access.log
			
 
				+maxretry = {{ fail2ban_maxretry_nginx_auth }}
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
@@ -0,0 +1,3 @@
 
				+[Definition]
			
 
				+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
			
 
				+ignoreregex =
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
@@ -0,0 +1,3 @@
 
				+[Definition]
			
 
				+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
			
 
				+ignoreregex =
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/defaults/main.yml
@@ -0,0 +1,509 @@
 
				+---
			
 
				+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
			
 
				+geo_nft_table_dir: "/etc/nftables.d"
			
 
				+geo_nft_file: "/etc/nftables.d/geo-block.nft"
			
 
				+# Set this to a directory containing pre-downloaded {cc}.zone files when the
			
 
				+# target host has no outbound internet access. Leave empty to download live.
			
 
				+geo_zone_files_dir: ""
			
 
				+
			
 
				+geo_countries:
			
 
				+  - code: AD   # Andorra
			
 
				+    blocked: true
			
 
				+  - code: AE   # United Arab Emirates
			
 
				+    blocked: true
			
 
				+  - code: AF   # Afghanistan
			
 
				+    blocked: true
			
 
				+  - code: AG   # Antigua and Barbuda
			
 
				+    blocked: true
			
 
				+  - code: AI   # Anguilla
			
 
				+    blocked: true
			
 
				+  - code: AL   # Albania
			
 
				+    blocked: true
			
 
				+  - code: AM   # Armenia
			
 
				+    blocked: true
			
 
				+  - code: AO   # Angola
			
 
				+    blocked: true
			
 
				+  - code: AQ   # Antarctica
			
 
				+    blocked: true
			
 
				+  - code: AR   # Argentina
			
 
				+    blocked: true
			
 
				+  - code: AS   # American Samoa
			
 
				+    blocked: true
			
 
				+  - code: AT   # Austria
			
 
				+    blocked: true
			
 
				+  - code: AU   # Australia
			
 
				+    blocked: true
			
 
				+  - code: AW   # Aruba
			
 
				+    blocked: true
			
 
				+  - code: AX   # Aland Islands
			
 
				+    blocked: true
			
 
				+  - code: AZ   # Azerbaijan
			
 
				+    blocked: true
			
 
				+  - code: BA   # Bosnia and Herzegovina
			
 
				+    blocked: true
			
 
				+  - code: BB   # Barbados
			
 
				+    blocked: true
			
 
				+  - code: BD   # Bangladesh
			
 
				+    blocked: true
			
 
				+  - code: BE   # Belgium
			
 
				+    blocked: true
			
 
				+  - code: BF   # Burkina Faso
			
 
				+    blocked: true
			
 
				+  - code: BG   # Bulgaria
			
 
				+    blocked: true
			
 
				+  - code: BH   # Bahrain
			
 
				+    blocked: true
			
 
				+  - code: BI   # Burundi
			
 
				+    blocked: true
			
 
				+  - code: BJ   # Benin
			
 
				+    blocked: true
			
 
				+  - code: BL   # Saint Barthelemy
			
 
				+    blocked: true
			
 
				+  - code: BM   # Bermuda
			
 
				+    blocked: true
			
 
				+  - code: BN   # Brunei Darussalam
			
 
				+    blocked: true
			
 
				+  - code: BO   # Bolivia
			
 
				+    blocked: true
			
 
				+  - code: BQ   # Bonaire
			
 
				+    blocked: true
			
 
				+  - code: BR   # Brazil
			
 
				+    blocked: true
			
 
				+  - code: BS   # Bahamas
			
 
				+    blocked: true
			
 
				+  - code: BT   # Bhutan
			
 
				+    blocked: true
			
 
				+  - code: BV   # Bouvet Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: BW   # Botswana
			
 
				+    blocked: true
			
 
				+  - code: BY   # Belarus
			
 
				+    blocked: true
			
 
				+  - code: BZ   # Belize
			
 
				+    blocked: true
			
 
				+  - code: CA   # Canada
			
 
				+    blocked: true
			
 
				+  - code: CC   # Cocos Islands
			
 
				+    blocked: true
			
 
				+  - code: CD   # Dem. Rep. Congo
			
 
				+    blocked: true
			
 
				+  - code: CF   # Central African Republic
			
 
				+    blocked: true
			
 
				+  - code: CG   # Congo
			
 
				+    blocked: true
			
 
				+  - code: CH   # Switzerland
			
 
				+    blocked: true
			
 
				+  - code: CI   # Cote d'Ivoire
			
 
				+    blocked: true
			
 
				+  - code: CK   # Cook Islands
			
 
				+    blocked: true
			
 
				+  - code: CL   # Chile
			
 
				+    blocked: true
			
 
				+  - code: CM   # Cameroon
			
 
				+    blocked: true
			
 
				+  - code: CN   # China
			
 
				+    blocked: true
			
 
				+  - code: CO   # Colombia
			
 
				+    blocked: true
			
 
				+  - code: CR   # Costa Rica
			
 
				+    blocked: true
			
 
				+  - code: CU   # Cuba
			
 
				+    blocked: true
			
 
				+  - code: CV   # Cabo Verde
			
 
				+    blocked: true
			
 
				+  - code: CW   # Curacao
			
 
				+    blocked: true
			
 
				+  - code: CX   # Christmas Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: CY   # Cyprus
			
 
				+    blocked: true
			
 
				+  - code: CZ   # Czechia
			
 
				+    blocked: true
			
 
				+  - code: DE   # Germany
			
 
				+    blocked: true
			
 
				+  - code: DJ   # Djibouti
			
 
				+    blocked: true
			
 
				+  - code: DK   # Denmark
			
 
				+    blocked: true
			
 
				+  - code: DM   # Dominica
			
 
				+    blocked: true
			
 
				+  - code: DO   # Dominican Republic
			
 
				+    blocked: true
			
 
				+  - code: DZ   # Algeria
			
 
				+    blocked: true
			
 
				+  - code: EC   # Ecuador
			
 
				+    blocked: true
			
 
				+  - code: EE   # Estonia
			
 
				+    blocked: true
			
 
				+  - code: EG   # Egypt
			
 
				+    blocked: true
			
 
				+  - code: EH   # Western Sahara — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: ER   # Eritrea
			
 
				+    blocked: true
			
 
				+  - code: ES   # Spain
			
 
				+    blocked: true
			
 
				+  - code: ET   # Ethiopia
			
 
				+    blocked: true
			
 
				+  - code: FI   # Finland
			
 
				+    blocked: true
			
 
				+  - code: FJ   # Fiji
			
 
				+    blocked: true
			
 
				+  - code: FK   # Falkland Islands
			
 
				+    blocked: true
			
 
				+  - code: FM   # Micronesia
			
 
				+    blocked: true
			
 
				+  - code: FO   # Faroe Islands
			
 
				+    blocked: true
			
 
				+  - code: FR   # France
			
 
				+    blocked: true
			
 
				+  - code: GA   # Gabon
			
 
				+    blocked: true
			
 
				+  - code: GB   # United Kingdom
			
 
				+    blocked: true
			
 
				+  - code: GD   # Grenada
			
 
				+    blocked: true
			
 
				+  - code: GE   # Georgia
			
 
				+    blocked: true
			
 
				+  - code: GF   # French Guiana
			
 
				+    blocked: true
			
 
				+  - code: GG   # Guernsey
			
 
				+    blocked: true
			
 
				+  - code: GH   # Ghana
			
 
				+    blocked: true
			
 
				+  - code: GI   # Gibraltar
			
 
				+    blocked: true
			
 
				+  - code: GL   # Greenland
			
 
				+    blocked: true
			
 
				+  - code: GM   # Gambia
			
 
				+    blocked: true
			
 
				+  - code: GN   # Guinea
			
 
				+    blocked: true
			
 
				+  - code: GP   # Guadeloupe
			
 
				+    blocked: true
			
 
				+  - code: GQ   # Equatorial Guinea
			
 
				+    blocked: true
			
 
				+  - code: GR   # Greece
			
 
				+    blocked: true
			
 
				+  - code: GS   # South Georgia — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: GT   # Guatemala
			
 
				+    blocked: true
			
 
				+  - code: GU   # Guam
			
 
				+    blocked: true
			
 
				+  - code: GW   # Guinea-Bissau
			
 
				+    blocked: true
			
 
				+  - code: GY   # Guyana
			
 
				+    blocked: true
			
 
				+  - code: HK   # Hong Kong
			
 
				+    blocked: true
			
 
				+  - code: HM   # Heard Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: HN   # Honduras
			
 
				+    blocked: true
			
 
				+  - code: HR   # Croatia
			
 
				+    blocked: true
			
 
				+  - code: HT   # Haiti
			
 
				+    blocked: true
			
 
				+  - code: HU   # Hungary
			
 
				+    blocked: true
			
 
				+  - code: ID   # Indonesia
			
 
				+    blocked: true
			
 
				+  - code: IE   # Ireland
			
 
				+    blocked: true
			
 
				+  - code: IL   # Israel
			
 
				+    blocked: true
			
 
				+  - code: IM   # Isle of Man
			
 
				+    blocked: true
			
 
				+  - code: IN   # India
			
 
				+    blocked: true
			
 
				+  - code: IO   # British Indian Ocean Territory
			
 
				+    blocked: true
			
 
				+  - code: IQ   # Iraq
			
 
				+    blocked: true
			
 
				+  - code: IR   # Iran
			
 
				+    blocked: true
			
 
				+  - code: IS   # Iceland
			
 
				+    blocked: true
			
 
				+  - code: IT   # Italy
			
 
				+    blocked: true
			
 
				+  - code: JE   # Jersey
			
 
				+    blocked: true
			
 
				+  - code: JM   # Jamaica
			
 
				+    blocked: true
			
 
				+  - code: JO   # Jordan
			
 
				+    blocked: true
			
 
				+  - code: JP   # Japan
			
 
				+    blocked: true
			
 
				+  - code: KE   # Kenya
			
 
				+    blocked: true
			
 
				+  - code: KG   # Kyrgyzstan
			
 
				+    blocked: true
			
 
				+  - code: KH   # Cambodia
			
 
				+    blocked: true
			
 
				+  - code: KI   # Kiribati
			
 
				+    blocked: true
			
 
				+  - code: KM   # Comoros
			
 
				+    blocked: true
			
 
				+  - code: KN   # Saint Kitts and Nevis
			
 
				+    blocked: true
			
 
				+  - code: KP   # North Korea
			
 
				+    blocked: true
			
 
				+  - code: KR   # South Korea
			
 
				+    blocked: true
			
 
				+  - code: KW   # Kuwait
			
 
				+    blocked: true
			
 
				+  - code: KY   # Cayman Islands
			
 
				+    blocked: true
			
 
				+  - code: KZ   # Kazakhstan
			
 
				+    blocked: true
			
 
				+  - code: LA   # Laos
			
 
				+    blocked: true
			
 
				+  - code: LB   # Lebanon
			
 
				+    blocked: true
			
 
				+  - code: LC   # Saint Lucia
			
 
				+    blocked: true
			
 
				+  - code: LI   # Liechtenstein
			
 
				+    blocked: true
			
 
				+  - code: LK   # Sri Lanka
			
 
				+    blocked: true
			
 
				+  - code: LR   # Liberia
			
 
				+    blocked: true
			
 
				+  - code: LS   # Lesotho
			
 
				+    blocked: true
			
 
				+  - code: LT   # Lithuania
			
 
				+    blocked: true
			
 
				+  - code: LU   # Luxembourg
			
 
				+    blocked: true
			
 
				+  - code: LV   # Latvia
			
 
				+    blocked: true
			
 
				+  - code: LY   # Libya
			
 
				+    blocked: true
			
 
				+  - code: MA   # Morocco
			
 
				+    blocked: true
			
 
				+  - code: MC   # Monaco
			
 
				+    blocked: true
			
 
				+  - code: MD   # Moldova
			
 
				+    blocked: true
			
 
				+  - code: ME   # Montenegro
			
 
				+    blocked: true
			
 
				+  - code: MF   # Saint Martin
			
 
				+    blocked: true
			
 
				+  - code: MG   # Madagascar
			
 
				+    blocked: true
			
 
				+  - code: MH   # Marshall Islands
			
 
				+    blocked: true
			
 
				+  - code: MK   # North Macedonia
			
 
				+    blocked: true
			
 
				+  - code: ML   # Mali
			
 
				+    blocked: true
			
 
				+  - code: MM   # Myanmar
			
 
				+    blocked: true
			
 
				+  - code: MN   # Mongolia
			
 
				+    blocked: true
			
 
				+  - code: MO   # Macao
			
 
				+    blocked: true
			
 
				+  - code: MP   # Northern Mariana Islands
			
 
				+    blocked: true
			
 
				+  - code: MQ   # Martinique
			
 
				+    blocked: true
			
 
				+  - code: MR   # Mauritania
			
 
				+    blocked: true
			
 
				+  - code: MS   # Montserrat
			
 
				+    blocked: true
			
 
				+  - code: MT   # Malta
			
 
				+    blocked: true
			
 
				+  - code: MU   # Mauritius
			
 
				+    blocked: true
			
 
				+  - code: MV   # Maldives
			
 
				+    blocked: true
			
 
				+  - code: MW   # Malawi
			
 
				+    blocked: true
			
 
				+  - code: MX   # Mexico
			
 
				+    blocked: true
			
 
				+  - code: MY   # Malaysia
			
 
				+    blocked: true
			
 
				+  - code: MZ   # Mozambique
			
 
				+    blocked: true
			
 
				+  - code: NA   # Namibia
			
 
				+    blocked: true
			
 
				+  - code: NC   # New Caledonia
			
 
				+    blocked: true
			
 
				+  - code: NE   # Niger
			
 
				+    blocked: true
			
 
				+  - code: NF   # Norfolk Island
			
 
				+    blocked: true
			
 
				+  - code: NG   # Nigeria
			
 
				+    blocked: true
			
 
				+  - code: NI   # Nicaragua
			
 
				+    blocked: true
			
 
				+  - code: NL   # Netherlands
			
 
				+    blocked: true
			
 
				+  - code: "NO"  # Norway
			
 
				+    blocked: true
			
 
				+  - code: NP   # Nepal
			
 
				+    blocked: true
			
 
				+  - code: NR   # Nauru
			
 
				+    blocked: true
			
 
				+  - code: NU   # Niue
			
 
				+    blocked: true
			
 
				+  - code: NZ   # New Zealand
			
 
				+    blocked: true
			
 
				+  - code: OM   # Oman
			
 
				+    blocked: true
			
 
				+  - code: PA   # Panama
			
 
				+    blocked: true
			
 
				+  - code: PE   # Peru
			
 
				+    blocked: true
			
 
				+  - code: PF   # French Polynesia
			
 
				+    blocked: true
			
 
				+  - code: PG   # Papua New Guinea
			
 
				+    blocked: true
			
 
				+  - code: PH   # Philippines
			
 
				+    blocked: true
			
 
				+  - code: PK   # Pakistan
			
 
				+    blocked: true
			
 
				+  - code: PL   # Poland
			
 
				+    blocked: true
			
 
				+  - code: PM   # Saint Pierre and Miquelon
			
 
				+    blocked: true
			
 
				+  - code: PN   # Pitcairn — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: PR   # Puerto Rico
			
 
				+    blocked: true
			
 
				+  - code: PS   # Palestine
			
 
				+    blocked: true
			
 
				+  - code: PT   # Portugal
			
 
				+    blocked: true
			
 
				+  - code: PW   # Palau
			
 
				+    blocked: true
			
 
				+  - code: PY   # Paraguay
			
 
				+    blocked: true
			
 
				+  - code: QA   # Qatar
			
 
				+    blocked: true
			
 
				+  - code: RE   # Reunion
			
 
				+    blocked: true
			
 
				+  - code: RO   # Romania
			
 
				+    blocked: true
			
 
				+  - code: RS   # Serbia
			
 
				+    blocked: true
			
 
				+  - code: RU   # Russia
			
 
				+    blocked: true
			
 
				+  - code: RW   # Rwanda
			
 
				+    blocked: true
			
 
				+  - code: SA   # Saudi Arabia
			
 
				+    blocked: true
			
 
				+  - code: SB   # Solomon Islands
			
 
				+    blocked: true
			
 
				+  - code: SC   # Seychelles
			
 
				+    blocked: true
			
 
				+  - code: SD   # Sudan
			
 
				+    blocked: true
			
 
				+  - code: SE   # Sweden
			
 
				+    blocked: true
			
 
				+  - code: SG   # Singapore
			
 
				+    blocked: true
			
 
				+  - code: SH   # Saint Helena — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: SI   # Slovenia
			
 
				+    blocked: true
			
 
				+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: SK   # Slovakia
			
 
				+    blocked: true
			
 
				+  - code: SL   # Sierra Leone
			
 
				+    blocked: true
			
 
				+  - code: SM   # San Marino
			
 
				+    blocked: true
			
 
				+  - code: SN   # Senegal
			
 
				+    blocked: true
			
 
				+  - code: SO   # Somalia
			
 
				+    blocked: true
			
 
				+  - code: SR   # Suriname
			
 
				+    blocked: true
			
 
				+  - code: SS   # South Sudan
			
 
				+    blocked: true
			
 
				+  - code: ST   # Sao Tome and Principe
			
 
				+    blocked: true
			
 
				+  - code: SV   # El Salvador
			
 
				+    blocked: true
			
 
				+  - code: SX   # Sint Maarten
			
 
				+    blocked: true
			
 
				+  - code: SY   # Syria
			
 
				+    blocked: true
			
 
				+  - code: SZ   # Eswatini
			
 
				+    blocked: true
			
 
				+  - code: TC   # Turks and Caicos Islands
			
 
				+    blocked: true
			
 
				+  - code: TD   # Chad
			
 
				+    blocked: true
			
 
				+  - code: TF   # French Southern Territories — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: TG   # Togo
			
 
				+    blocked: true
			
 
				+  - code: TH   # Thailand
			
 
				+    blocked: true
			
 
				+  - code: TJ   # Tajikistan
			
 
				+    blocked: true
			
 
				+  - code: TK   # Tokelau
			
 
				+    blocked: true
			
 
				+  - code: TL   # Timor-Leste
			
 
				+    blocked: true
			
 
				+  - code: TM   # Turkmenistan
			
 
				+    blocked: true
			
 
				+  - code: TN   # Tunisia
			
 
				+    blocked: true
			
 
				+  - code: TO   # Tonga
			
 
				+    blocked: true
			
 
				+  - code: TR   # Turkey
			
 
				+    blocked: true
			
 
				+  - code: TT   # Trinidad and Tobago
			
 
				+    blocked: true
			
 
				+  - code: TV   # Tuvalu
			
 
				+    blocked: true
			
 
				+  - code: TW   # Taiwan
			
 
				+    blocked: true
			
 
				+  - code: TZ   # Tanzania
			
 
				+    blocked: true
			
 
				+  - code: UA   # Ukraine
			
 
				+    blocked: true
			
 
				+  - code: UG   # Uganda
			
 
				+    blocked: true
			
 
				+  - code: UM   # US Minor Outlying Islands
			
 
				+    blocked: true
			
 
				+  - code: US   # United States
			
 
				+    blocked: false
			
 
				+  - code: UY   # Uruguay
			
 
				+    blocked: true
			
 
				+  - code: UZ   # Uzbekistan
			
 
				+    blocked: true
			
 
				+  - code: VA   # Vatican City
			
 
				+    blocked: true
			
 
				+  - code: VC   # Saint Vincent and the Grenadines
			
 
				+    blocked: true
			
 
				+  - code: VE   # Venezuela
			
 
				+    blocked: true
			
 
				+  - code: VG   # British Virgin Islands
			
 
				+    blocked: true
			
 
				+  - code: VI   # US Virgin Islands
			
 
				+    blocked: true
			
 
				+  - code: VN   # Vietnam
			
 
				+    blocked: true
			
 
				+  - code: VU   # Vanuatu
			
 
				+    blocked: true
			
 
				+  - code: WF   # Wallis and Futuna
			
 
				+    blocked: true
			
 
				+  - code: WS   # Samoa
			
 
				+    blocked: true
			
 
				+  - code: XK   # Kosovo — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: YE   # Yemen
			
 
				+    blocked: true
			
 
				+  - code: YT   # Mayotte
			
 
				+    blocked: true
			
 
				+  - code: ZA   # South Africa
			
 
				+    blocked: true
			
 
				+  - code: ZM   # Zambia
			
 
				+    blocked: true
			
 
				+  - code: ZW   # Zimbabwe
			
 
				+    blocked: true
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/handlers/main.yml
@@ -0,0 +1,4 @@
 
				+---
			
 
				+- name: reload nftables
			
 
				+  ansible.builtin.command: nft -f {{ geo_nft_file }}
			
 
				+  changed_when: true
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/tasks/main.yml
@@ -0,0 +1,103 @@
 
				+---
			
 
				+- name: Ensure nftables.d directory exists
			
 
				+  ansible.builtin.file:
			
 
				+    path: "{{ geo_nft_table_dir }}"
			
 
				+    state: directory
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0755'
			
 
				+
			
 
				+- name: Create temp directory for zone files
			
 
				+  ansible.builtin.tempfile:
			
 
				+    state: directory
			
 
				+    suffix: geo_zones
			
 
				+  register: geo_temp_dir
			
 
				+
			
 
				+# --- Source: live download ---
			
 
				+
			
 
				+- name: Test connectivity to ipdeny.com (fast pre-check)
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
			
 
				+    method: HEAD
			
 
				+    timeout: 8
			
 
				+  register: geo_connectivity_check
			
 
				+  ignore_errors: yes
			
 
				+  when: geo_zone_files_dir | length == 0
			
 
				+
			
 
				+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
			
 
				+  ansible.builtin.fail:
			
 
				+    msg: >-
			
 
				+      Cannot reach ipdeny.com (connection timed out or refused) and
			
 
				+      geo_zone_files_dir is not set. Pre-download zone files on a machine
			
 
				+      with internet access using scripts/download-geo-zones.sh, copy them
			
 
				+      to this host, then set geo_zone_files_dir in inventory or with -e.
			
 
				+  when:
			
 
				+    - geo_zone_files_dir | length == 0
			
 
				+    - geo_connectivity_check is failed
			
 
				+
			
 
				+- name: Download zone files for blocked countries
			
 
				+  ansible.builtin.get_url:
			
 
				+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
			
 
				+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
			
 
				+    timeout: 30
			
 
				+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.code }}"
			
 
				+  ignore_errors: yes
			
 
				+  when:
			
 
				+    - geo_zone_files_dir | length == 0
			
 
				+    - geo_connectivity_check is succeeded
			
 
				+
			
 
				+# --- Source: local pre-downloaded cache ---
			
 
				+
			
 
				+- name: Copy zone files from local cache directory
			
 
				+  ansible.builtin.copy:
			
 
				+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
			
 
				+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
			
 
				+    remote_src: yes
			
 
				+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.code }}"
			
 
				+  ignore_errors: yes
			
 
				+  when: geo_zone_files_dir | length > 0
			
 
				+
			
 
				+# --- Assemble and deploy ---
			
 
				+
			
 
				+- name: Assemble all CIDRs from downloaded zone files
			
 
				+  ansible.builtin.shell: >
			
 
				+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
			
 
				+    grep -v '^#' | grep -v '^$' | sort -u
			
 
				+  register: geo_cidrs_raw
			
 
				+  changed_when: false
			
 
				+
			
 
				+- name: Set geo_blocked_cidrs fact
			
 
				+  ansible.builtin.set_fact:
			
 
				+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
			
 
				+
			
 
				+- name: Deploy geo-block nftables ruleset
			
 
				+  ansible.builtin.template:
			
 
				+    src: geo-block.nft.j2
			
 
				+    dest: "{{ geo_nft_file }}"
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nftables
			
 
				+
			
 
				+- name: Ensure nftables.conf includes geo-block.nft
			
 
				+  ansible.builtin.lineinfile:
			
 
				+    path: /etc/sysconfig/nftables.conf
			
 
				+    line: 'include "{{ geo_nft_file }}"'
			
 
				+    state: present
			
 
				+    backup: yes
			
 
				+
			
 
				+- name: Enable and start nftables service
			
 
				+  ansible.builtin.service:
			
 
				+    name: nftables
			
 
				+    state: started
			
 
				+    enabled: yes
			
 
				+
			
 
				+- name: Clean up temp directory
			
 
				+  ansible.builtin.file:
			
 
				+    path: "{{ geo_temp_dir.path }}"
			
 
				+    state: absent
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
@@ -0,0 +1,26 @@
 
				+#!/usr/sbin/nft -f
			
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+# Ensure table exists, then flush for idempotency
			
 
				+add table inet geo_block
			
 
				+flush table inet geo_block
			
 
				+
			
 
				+table inet geo_block {
			
 
				+    set blocked_countries {
			
 
				+        type ipv4_addr
			
 
				+        flags interval
			
 
				+{% if geo_blocked_cidrs | length > 0 %}
			
 
				+        elements = {
			
 
				+{% for cidr in geo_blocked_cidrs %}
			
 
				+            {{ cidr }}{% if not loop.last %},{% endif %}
			
 
				+
			
 
				+{% endfor %}
			
 
				+        }
			
 
				+{% endif %}
			
 
				+    }
			
 
				+
			
 
				+    chain prerouting {
			
 
				+        type filter hook prerouting priority -100; policy accept;
			
 
				+        ip saddr @blocked_countries drop
			
 
				+    }
			
 
				+}
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/defaults/main.yml
@@ -0,0 +1,15 @@
 
				+---
			
 
				+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
			
 
				+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
			
 
				+nginx_hsts_max_age: 31536000
			
 
				+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
			
 
				+nginx_client_max_body_size: "10m"
			
 
				+nginx_proxy_read_timeout: 60
			
 
				+
			
 
				+# Services that need a port-80 → HTTPS redirect added.
			
 
				+# List only services that do NOT already have a redirect in their existing config.
			
 
				+nginx_redirect_services:
			
 
				+  - name: service1
			
 
				+    server_name: service1.example.com
			
 
				+  - name: service2
			
 
				+    server_name: service2.example.com
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/handlers/main.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- name: reload nginx
			
 
				+  ansible.builtin.service:
			
 
				+    name: nginx
			
 
				+    state: reloaded
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/tasks/main.yml
@@ -0,0 +1,44 @@
 
				+---
			
 
				+- name: Deploy security headers configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: security_headers.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-security-headers.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy SSL parameters configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: ssl_params.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-ssl-params.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy proxy parameters configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: proxy_params.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-proxy-params.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy HTTP to HTTPS redirect configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: http_redirect.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-http-redirects.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Validate NGINX configuration
			
 
				+  ansible.builtin.command: nginx -t
			
 
				+  changed_when: false
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
@@ -0,0 +1,8 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+{% for svc in nginx_redirect_services %}
			
 
				+server {
			
 
				+    listen 80;
			
 
				+    server_name {{ svc.server_name }};
			
 
				+    return 301 https://$host$request_uri;
			
 
				+}
			
 
				+{% endfor %}
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
@@ -0,0 +1,8 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+proxy_hide_header X-Powered-By;
			
 
				+proxy_hide_header Server;
			
 
				+proxy_set_header X-Real-IP $remote_addr;
			
 
				+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
			
 
				+proxy_set_header X-Forwarded-Proto $scheme;
			
 
				+proxy_read_timeout {{ nginx_proxy_read_timeout }};
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
@@ -0,0 +1,17 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+server_tokens off;
			
 
				+
			
 
				+# Rate limiting zone definition
			
 
				+limit_req_zone {{ nginx_rate_limit_req_zone }};
			
 
				+
			
 
				+# Client body size limit
			
 
				+client_max_body_size {{ nginx_client_max_body_size }};
			
 
				+
			
 
				+# Security headers
			
 
				+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
			
 
				+add_header X-Frame-Options SAMEORIGIN always;
			
 
				+add_header X-Content-Type-Options nosniff always;
			
 
				+add_header Referrer-Policy strict-origin-when-cross-origin always;
			
 
				+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
			
 
				+add_header X-XSS-Protection "1; mode=block" always;
			
--- a/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
+++ b/tftsr_nginx-hardening/nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
@@ -0,0 +1,10 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+ssl_protocols {{ nginx_ssl_protocols }};
			
 
				+ssl_ciphers {{ nginx_ssl_ciphers }};
			
 
				+ssl_prefer_server_ciphers off;
			
 
				+ssl_session_timeout 1d;
			
 
				+ssl_stapling on;
			
 
				+ssl_stapling_verify on;
			
 
				+resolver 8.8.8.8 8.8.4.4 valid=300s;
			
 
				+resolver_timeout 5s;
			
--- a/tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh
+++ b/tftsr_nginx-hardening/nginx-hardening/scripts/download-geo-zones.sh
@@ -0,0 +1,71 @@
 
				+#!/usr/bin/env bash
			
 
				+# Download ipdeny.com aggregated zone files for all blocked countries.
			
 
				+# Run this on a machine WITH internet access, then rsync the output
			
 
				+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
			
 
				+#
			
 
				+# Usage:
			
 
				+#   ./scripts/download-geo-zones.sh [output-dir]
			
 
				+#
			
 
				+# Example workflow:
			
 
				+#   # On your workstation:
			
 
				+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
			
 
				+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
			
 
				+#
			
 
				+#   # Then run the playbook pointing at the cache:
			
 
				+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
			
 
				+
			
 
				+set -euo pipefail
			
 
				+
			
 
				+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
			
 
				+OUT_DIR="${1:-/tmp/geo_zones}"
			
 
				+
			
 
				+# All blocked country codes (excludes US and ipdeny-absent territories)
			
 
				+COUNTRIES=(
			
 
				+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
			
 
				+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
			
 
				+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
			
 
				+  DE DJ DK DM DO DZ
			
 
				+  EC EE EG ER ES ET
			
 
				+  FI FJ FK FM FO FR
			
 
				+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
			
 
				+  HK HN HR HT HU
			
 
				+  ID IE IL IM IN IO IQ IR IS IT
			
 
				+  JE JM JO JP
			
 
				+  KE KG KH KI KM KN KP KR KW KY KZ
			
 
				+  LA LB LC LI LK LR LS LT LU LV LY
			
 
				+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
			
 
				+  NA NC NE NF NG NI NL NO NP NR NU NZ
			
 
				+  OM
			
 
				+  PA PE PF PG PH PK PL PM PR PS PT PW PY
			
 
				+  QA
			
 
				+  RE RO RS RU RW
			
 
				+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
			
 
				+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
			
 
				+  UA UG UM UY UZ
			
 
				+  VA VC VE VG VI VN VU
			
 
				+  WF WS
			
 
				+  YE YT
			
 
				+  ZA ZM ZW
			
 
				+)
			
 
				+
			
 
				+mkdir -p "$OUT_DIR"
			
 
				+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
			
 
				+
			
 
				+ok=0; fail=0
			
 
				+for cc in "${COUNTRIES[@]}"; do
			
 
				+  url="${BASE_URL}/${cc,,}-aggregated.zone"
			
 
				+  dest="${OUT_DIR}/${cc,,}.zone"
			
 
				+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
			
 
				+    (( ++ok ))
			
 
				+  else
			
 
				+    echo "  SKIP $cc (no zone file at ipdeny.com)"
			
 
				+    rm -f "$dest"
			
 
				+    (( ++fail ))
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+echo "Done: $ok downloaded, $fail skipped."
			
 
				+echo ""
			
 
				+echo "Next steps:"
			
 
				+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
			
 
				+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"
			
--- a/tftsr_nginx-hardening/nginx-hardening/site.yml
+++ b/tftsr_nginx-hardening/nginx-hardening/site.yml
@@ -0,0 +1,7 @@
 
				+---
			
 
				+- hosts: all
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - nginx_hardening
			
 
				+    - fail2ban
			
 
				+    - geo_blocking
			
--- a/tftsr_nginx-hardening/playbooks/fail2ban.yml
+++ b/tftsr_nginx-hardening/playbooks/fail2ban.yml
@@ -0,0 +1,6 @@
 
				+---
			
 
				+- hosts: localhost
			
 
				+  connection: local
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - fail2ban
			
--- a/tftsr_nginx-hardening/playbooks/geo_blocking.yml
+++ b/tftsr_nginx-hardening/playbooks/geo_blocking.yml
@@ -0,0 +1,6 @@
 
				+---
			
 
				+- hosts: localhost
			
 
				+  connection: local
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - geo_blocking
			
--- a/tftsr_nginx-hardening/playbooks/nginx_hardening.yml
+++ b/tftsr_nginx-hardening/playbooks/nginx_hardening.yml
@@ -0,0 +1,6 @@
 
				+---
			
 
				+- hosts: localhost
			
 
				+  connection: local
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - nginx_hardening
			
--- a/tftsr_nginx-hardening/playbooks/update_geo_blocks.yml
+++ b/tftsr_nginx-hardening/playbooks/update_geo_blocks.yml
@@ -0,0 +1,6 @@
 
				+---
			
 
				+- hosts: localhost
			
 
				+  connection: local
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - geo_blocking
			
--- a/tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml
+++ b/tftsr_nginx-hardening/roles/fail2ban/defaults/main.yml
@@ -0,0 +1,7 @@
 
				+---
			
 
				+fail2ban_bantime: 3600
			
 
				+fail2ban_findtime: 600
			
 
				+fail2ban_maxretry_ssh: 5
			
 
				+fail2ban_maxretry_nginx_4xx: 20
			
 
				+fail2ban_maxretry_nginx_auth: 5
			
 
				+fail2ban_ignoreip: "127.0.0.1/8 ::1"
			
--- a/tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml
+++ b/tftsr_nginx-hardening/roles/fail2ban/handlers/main.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- name: restart fail2ban
			
 
				+  ansible.builtin.service:
			
 
				+    name: fail2ban
			
 
				+    state: restarted
			
--- a/tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml
+++ b/tftsr_nginx-hardening/roles/fail2ban/tasks/main.yml
@@ -0,0 +1,41 @@
 
				+---
			
 
				+- name: Install fail2ban
			
 
				+  ansible.builtin.dnf:
			
 
				+    name: fail2ban
			
 
				+    state: present
			
 
				+
			
 
				+- name: Deploy nginx-4xx filter
			
 
				+  ansible.builtin.template:
			
 
				+    src: nginx-4xx.conf.j2
			
 
				+    dest: /etc/fail2ban/filter.d/nginx-4xx.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Deploy nginx-auth filter
			
 
				+  ansible.builtin.template:
			
 
				+    src: nginx-auth.conf.j2
			
 
				+    dest: /etc/fail2ban/filter.d/nginx-auth.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Deploy jail.local configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: jail.local.j2
			
 
				+    dest: /etc/fail2ban/jail.local
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: restart fail2ban
			
 
				+
			
 
				+- name: Enable and start fail2ban service
			
 
				+  ansible.builtin.service:
			
 
				+    name: fail2ban
			
 
				+    state: started
			
 
				+    enabled: yes
			
--- a/tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2
+++ b/tftsr_nginx-hardening/roles/fail2ban/templates/jail.local.j2
@@ -0,0 +1,22 @@
 
				+[DEFAULT]
			
 
				+ignoreip = {{ fail2ban_ignoreip }}
			
 
				+bantime  = {{ fail2ban_bantime }}
			
 
				+findtime = {{ fail2ban_findtime }}
			
 
				+
			
 
				+[sshd]
			
 
				+enabled  = true
			
 
				+port     = ssh
			
 
				+logpath  = /var/log/secure
			
 
				+maxretry = {{ fail2ban_maxretry_ssh }}
			
 
				+
			
 
				+[nginx-4xx]
			
 
				+enabled  = true
			
 
				+filter   = nginx-4xx
			
 
				+logpath  = /var/log/nginx/access.log
			
 
				+maxretry = {{ fail2ban_maxretry_nginx_4xx }}
			
 
				+
			
 
				+[nginx-auth]
			
 
				+enabled  = true
			
 
				+filter   = nginx-auth
			
 
				+logpath  = /var/log/nginx/access.log
			
 
				+maxretry = {{ fail2ban_maxretry_nginx_auth }}
			
--- a/tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
+++ b/tftsr_nginx-hardening/roles/fail2ban/templates/nginx-4xx.conf.j2
@@ -0,0 +1,3 @@
 
				+[Definition]
			
 
				+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (4[0-9]{2}) \d+
			
 
				+ignoreregex =
			
--- a/tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
+++ b/tftsr_nginx-hardening/roles/fail2ban/templates/nginx-auth.conf.j2
@@ -0,0 +1,3 @@
 
				+[Definition]
			
 
				+failregex = ^<HOST> - \S+ \S+ \[.*\] "(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS) \S+ HTTP/[0-9.]+" (401|403) \d+
			
 
				+ignoreregex =
			
--- a/tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml
+++ b/tftsr_nginx-hardening/roles/geo_blocking/defaults/main.yml
@@ -0,0 +1,509 @@
 
				+---
			
 
				+geo_ipdeny_base_url: "https://www.ipdeny.com/ipblocks/data/aggregated"
			
 
				+geo_nft_table_dir: "/etc/nftables.d"
			
 
				+geo_nft_file: "/etc/nftables.d/geo-block.nft"
			
 
				+# Set this to a directory containing pre-downloaded {cc}.zone files when the
			
 
				+# target host has no outbound internet access. Leave empty to download live.
			
 
				+geo_zone_files_dir: ""
			
 
				+
			
 
				+geo_countries:
			
 
				+  - code: AD   # Andorra
			
 
				+    blocked: true
			
 
				+  - code: AE   # United Arab Emirates
			
 
				+    blocked: true
			
 
				+  - code: AF   # Afghanistan
			
 
				+    blocked: true
			
 
				+  - code: AG   # Antigua and Barbuda
			
 
				+    blocked: true
			
 
				+  - code: AI   # Anguilla
			
 
				+    blocked: true
			
 
				+  - code: AL   # Albania
			
 
				+    blocked: true
			
 
				+  - code: AM   # Armenia
			
 
				+    blocked: true
			
 
				+  - code: AO   # Angola
			
 
				+    blocked: true
			
 
				+  - code: AQ   # Antarctica
			
 
				+    blocked: true
			
 
				+  - code: AR   # Argentina
			
 
				+    blocked: true
			
 
				+  - code: AS   # American Samoa
			
 
				+    blocked: true
			
 
				+  - code: AT   # Austria
			
 
				+    blocked: true
			
 
				+  - code: AU   # Australia
			
 
				+    blocked: true
			
 
				+  - code: AW   # Aruba
			
 
				+    blocked: true
			
 
				+  - code: AX   # Aland Islands
			
 
				+    blocked: true
			
 
				+  - code: AZ   # Azerbaijan
			
 
				+    blocked: true
			
 
				+  - code: BA   # Bosnia and Herzegovina
			
 
				+    blocked: true
			
 
				+  - code: BB   # Barbados
			
 
				+    blocked: true
			
 
				+  - code: BD   # Bangladesh
			
 
				+    blocked: true
			
 
				+  - code: BE   # Belgium
			
 
				+    blocked: true
			
 
				+  - code: BF   # Burkina Faso
			
 
				+    blocked: true
			
 
				+  - code: BG   # Bulgaria
			
 
				+    blocked: true
			
 
				+  - code: BH   # Bahrain
			
 
				+    blocked: true
			
 
				+  - code: BI   # Burundi
			
 
				+    blocked: true
			
 
				+  - code: BJ   # Benin
			
 
				+    blocked: true
			
 
				+  - code: BL   # Saint Barthelemy
			
 
				+    blocked: true
			
 
				+  - code: BM   # Bermuda
			
 
				+    blocked: true
			
 
				+  - code: BN   # Brunei Darussalam
			
 
				+    blocked: true
			
 
				+  - code: BO   # Bolivia
			
 
				+    blocked: true
			
 
				+  - code: BQ   # Bonaire
			
 
				+    blocked: true
			
 
				+  - code: BR   # Brazil
			
 
				+    blocked: true
			
 
				+  - code: BS   # Bahamas
			
 
				+    blocked: true
			
 
				+  - code: BT   # Bhutan
			
 
				+    blocked: true
			
 
				+  - code: BV   # Bouvet Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: BW   # Botswana
			
 
				+    blocked: true
			
 
				+  - code: BY   # Belarus
			
 
				+    blocked: true
			
 
				+  - code: BZ   # Belize
			
 
				+    blocked: true
			
 
				+  - code: CA   # Canada
			
 
				+    blocked: true
			
 
				+  - code: CC   # Cocos Islands
			
 
				+    blocked: true
			
 
				+  - code: CD   # Dem. Rep. Congo
			
 
				+    blocked: true
			
 
				+  - code: CF   # Central African Republic
			
 
				+    blocked: true
			
 
				+  - code: CG   # Congo
			
 
				+    blocked: true
			
 
				+  - code: CH   # Switzerland
			
 
				+    blocked: true
			
 
				+  - code: CI   # Cote d'Ivoire
			
 
				+    blocked: true
			
 
				+  - code: CK   # Cook Islands
			
 
				+    blocked: true
			
 
				+  - code: CL   # Chile
			
 
				+    blocked: true
			
 
				+  - code: CM   # Cameroon
			
 
				+    blocked: true
			
 
				+  - code: CN   # China
			
 
				+    blocked: true
			
 
				+  - code: CO   # Colombia
			
 
				+    blocked: true
			
 
				+  - code: CR   # Costa Rica
			
 
				+    blocked: true
			
 
				+  - code: CU   # Cuba
			
 
				+    blocked: true
			
 
				+  - code: CV   # Cabo Verde
			
 
				+    blocked: true
			
 
				+  - code: CW   # Curacao
			
 
				+    blocked: true
			
 
				+  - code: CX   # Christmas Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: CY   # Cyprus
			
 
				+    blocked: true
			
 
				+  - code: CZ   # Czechia
			
 
				+    blocked: true
			
 
				+  - code: DE   # Germany
			
 
				+    blocked: true
			
 
				+  - code: DJ   # Djibouti
			
 
				+    blocked: true
			
 
				+  - code: DK   # Denmark
			
 
				+    blocked: true
			
 
				+  - code: DM   # Dominica
			
 
				+    blocked: true
			
 
				+  - code: DO   # Dominican Republic
			
 
				+    blocked: true
			
 
				+  - code: DZ   # Algeria
			
 
				+    blocked: true
			
 
				+  - code: EC   # Ecuador
			
 
				+    blocked: true
			
 
				+  - code: EE   # Estonia
			
 
				+    blocked: true
			
 
				+  - code: EG   # Egypt
			
 
				+    blocked: true
			
 
				+  - code: EH   # Western Sahara — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: ER   # Eritrea
			
 
				+    blocked: true
			
 
				+  - code: ES   # Spain
			
 
				+    blocked: true
			
 
				+  - code: ET   # Ethiopia
			
 
				+    blocked: true
			
 
				+  - code: FI   # Finland
			
 
				+    blocked: true
			
 
				+  - code: FJ   # Fiji
			
 
				+    blocked: true
			
 
				+  - code: FK   # Falkland Islands
			
 
				+    blocked: true
			
 
				+  - code: FM   # Micronesia
			
 
				+    blocked: true
			
 
				+  - code: FO   # Faroe Islands
			
 
				+    blocked: true
			
 
				+  - code: FR   # France
			
 
				+    blocked: true
			
 
				+  - code: GA   # Gabon
			
 
				+    blocked: true
			
 
				+  - code: GB   # United Kingdom
			
 
				+    blocked: true
			
 
				+  - code: GD   # Grenada
			
 
				+    blocked: true
			
 
				+  - code: GE   # Georgia
			
 
				+    blocked: true
			
 
				+  - code: GF   # French Guiana
			
 
				+    blocked: true
			
 
				+  - code: GG   # Guernsey
			
 
				+    blocked: true
			
 
				+  - code: GH   # Ghana
			
 
				+    blocked: true
			
 
				+  - code: GI   # Gibraltar
			
 
				+    blocked: true
			
 
				+  - code: GL   # Greenland
			
 
				+    blocked: true
			
 
				+  - code: GM   # Gambia
			
 
				+    blocked: true
			
 
				+  - code: GN   # Guinea
			
 
				+    blocked: true
			
 
				+  - code: GP   # Guadeloupe
			
 
				+    blocked: true
			
 
				+  - code: GQ   # Equatorial Guinea
			
 
				+    blocked: true
			
 
				+  - code: GR   # Greece
			
 
				+    blocked: true
			
 
				+  - code: GS   # South Georgia — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: GT   # Guatemala
			
 
				+    blocked: true
			
 
				+  - code: GU   # Guam
			
 
				+    blocked: true
			
 
				+  - code: GW   # Guinea-Bissau
			
 
				+    blocked: true
			
 
				+  - code: GY   # Guyana
			
 
				+    blocked: true
			
 
				+  - code: HK   # Hong Kong
			
 
				+    blocked: true
			
 
				+  - code: HM   # Heard Island — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: HN   # Honduras
			
 
				+    blocked: true
			
 
				+  - code: HR   # Croatia
			
 
				+    blocked: true
			
 
				+  - code: HT   # Haiti
			
 
				+    blocked: true
			
 
				+  - code: HU   # Hungary
			
 
				+    blocked: true
			
 
				+  - code: ID   # Indonesia
			
 
				+    blocked: true
			
 
				+  - code: IE   # Ireland
			
 
				+    blocked: true
			
 
				+  - code: IL   # Israel
			
 
				+    blocked: true
			
 
				+  - code: IM   # Isle of Man
			
 
				+    blocked: true
			
 
				+  - code: IN   # India
			
 
				+    blocked: true
			
 
				+  - code: IO   # British Indian Ocean Territory
			
 
				+    blocked: true
			
 
				+  - code: IQ   # Iraq
			
 
				+    blocked: true
			
 
				+  - code: IR   # Iran
			
 
				+    blocked: true
			
 
				+  - code: IS   # Iceland
			
 
				+    blocked: true
			
 
				+  - code: IT   # Italy
			
 
				+    blocked: true
			
 
				+  - code: JE   # Jersey
			
 
				+    blocked: true
			
 
				+  - code: JM   # Jamaica
			
 
				+    blocked: true
			
 
				+  - code: JO   # Jordan
			
 
				+    blocked: true
			
 
				+  - code: JP   # Japan
			
 
				+    blocked: true
			
 
				+  - code: KE   # Kenya
			
 
				+    blocked: true
			
 
				+  - code: KG   # Kyrgyzstan
			
 
				+    blocked: true
			
 
				+  - code: KH   # Cambodia
			
 
				+    blocked: true
			
 
				+  - code: KI   # Kiribati
			
 
				+    blocked: true
			
 
				+  - code: KM   # Comoros
			
 
				+    blocked: true
			
 
				+  - code: KN   # Saint Kitts and Nevis
			
 
				+    blocked: true
			
 
				+  - code: KP   # North Korea
			
 
				+    blocked: true
			
 
				+  - code: KR   # South Korea
			
 
				+    blocked: true
			
 
				+  - code: KW   # Kuwait
			
 
				+    blocked: true
			
 
				+  - code: KY   # Cayman Islands
			
 
				+    blocked: true
			
 
				+  - code: KZ   # Kazakhstan
			
 
				+    blocked: true
			
 
				+  - code: LA   # Laos
			
 
				+    blocked: true
			
 
				+  - code: LB   # Lebanon
			
 
				+    blocked: true
			
 
				+  - code: LC   # Saint Lucia
			
 
				+    blocked: true
			
 
				+  - code: LI   # Liechtenstein
			
 
				+    blocked: true
			
 
				+  - code: LK   # Sri Lanka
			
 
				+    blocked: true
			
 
				+  - code: LR   # Liberia
			
 
				+    blocked: true
			
 
				+  - code: LS   # Lesotho
			
 
				+    blocked: true
			
 
				+  - code: LT   # Lithuania
			
 
				+    blocked: true
			
 
				+  - code: LU   # Luxembourg
			
 
				+    blocked: true
			
 
				+  - code: LV   # Latvia
			
 
				+    blocked: true
			
 
				+  - code: LY   # Libya
			
 
				+    blocked: true
			
 
				+  - code: MA   # Morocco
			
 
				+    blocked: true
			
 
				+  - code: MC   # Monaco
			
 
				+    blocked: true
			
 
				+  - code: MD   # Moldova
			
 
				+    blocked: true
			
 
				+  - code: ME   # Montenegro
			
 
				+    blocked: true
			
 
				+  - code: MF   # Saint Martin
			
 
				+    blocked: true
			
 
				+  - code: MG   # Madagascar
			
 
				+    blocked: true
			
 
				+  - code: MH   # Marshall Islands
			
 
				+    blocked: true
			
 
				+  - code: MK   # North Macedonia
			
 
				+    blocked: true
			
 
				+  - code: ML   # Mali
			
 
				+    blocked: true
			
 
				+  - code: MM   # Myanmar
			
 
				+    blocked: true
			
 
				+  - code: MN   # Mongolia
			
 
				+    blocked: true
			
 
				+  - code: MO   # Macao
			
 
				+    blocked: true
			
 
				+  - code: MP   # Northern Mariana Islands
			
 
				+    blocked: true
			
 
				+  - code: MQ   # Martinique
			
 
				+    blocked: true
			
 
				+  - code: MR   # Mauritania
			
 
				+    blocked: true
			
 
				+  - code: MS   # Montserrat
			
 
				+    blocked: true
			
 
				+  - code: MT   # Malta
			
 
				+    blocked: true
			
 
				+  - code: MU   # Mauritius
			
 
				+    blocked: true
			
 
				+  - code: MV   # Maldives
			
 
				+    blocked: true
			
 
				+  - code: MW   # Malawi
			
 
				+    blocked: true
			
 
				+  - code: MX   # Mexico
			
 
				+    blocked: true
			
 
				+  - code: MY   # Malaysia
			
 
				+    blocked: true
			
 
				+  - code: MZ   # Mozambique
			
 
				+    blocked: true
			
 
				+  - code: NA   # Namibia
			
 
				+    blocked: true
			
 
				+  - code: NC   # New Caledonia
			
 
				+    blocked: true
			
 
				+  - code: NE   # Niger
			
 
				+    blocked: true
			
 
				+  - code: NF   # Norfolk Island
			
 
				+    blocked: true
			
 
				+  - code: NG   # Nigeria
			
 
				+    blocked: true
			
 
				+  - code: NI   # Nicaragua
			
 
				+    blocked: true
			
 
				+  - code: NL   # Netherlands
			
 
				+    blocked: true
			
 
				+  - code: "NO"  # Norway
			
 
				+    blocked: true
			
 
				+  - code: NP   # Nepal
			
 
				+    blocked: true
			
 
				+  - code: NR   # Nauru
			
 
				+    blocked: true
			
 
				+  - code: NU   # Niue
			
 
				+    blocked: true
			
 
				+  - code: NZ   # New Zealand
			
 
				+    blocked: true
			
 
				+  - code: OM   # Oman
			
 
				+    blocked: true
			
 
				+  - code: PA   # Panama
			
 
				+    blocked: true
			
 
				+  - code: PE   # Peru
			
 
				+    blocked: true
			
 
				+  - code: PF   # French Polynesia
			
 
				+    blocked: true
			
 
				+  - code: PG   # Papua New Guinea
			
 
				+    blocked: true
			
 
				+  - code: PH   # Philippines
			
 
				+    blocked: true
			
 
				+  - code: PK   # Pakistan
			
 
				+    blocked: true
			
 
				+  - code: PL   # Poland
			
 
				+    blocked: true
			
 
				+  - code: PM   # Saint Pierre and Miquelon
			
 
				+    blocked: true
			
 
				+  - code: PN   # Pitcairn — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: PR   # Puerto Rico
			
 
				+    blocked: true
			
 
				+  - code: PS   # Palestine
			
 
				+    blocked: true
			
 
				+  - code: PT   # Portugal
			
 
				+    blocked: true
			
 
				+  - code: PW   # Palau
			
 
				+    blocked: true
			
 
				+  - code: PY   # Paraguay
			
 
				+    blocked: true
			
 
				+  - code: QA   # Qatar
			
 
				+    blocked: true
			
 
				+  - code: RE   # Reunion
			
 
				+    blocked: true
			
 
				+  - code: RO   # Romania
			
 
				+    blocked: true
			
 
				+  - code: RS   # Serbia
			
 
				+    blocked: true
			
 
				+  - code: RU   # Russia
			
 
				+    blocked: true
			
 
				+  - code: RW   # Rwanda
			
 
				+    blocked: true
			
 
				+  - code: SA   # Saudi Arabia
			
 
				+    blocked: true
			
 
				+  - code: SB   # Solomon Islands
			
 
				+    blocked: true
			
 
				+  - code: SC   # Seychelles
			
 
				+    blocked: true
			
 
				+  - code: SD   # Sudan
			
 
				+    blocked: true
			
 
				+  - code: SE   # Sweden
			
 
				+    blocked: true
			
 
				+  - code: SG   # Singapore
			
 
				+    blocked: true
			
 
				+  - code: SH   # Saint Helena — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: SI   # Slovenia
			
 
				+    blocked: true
			
 
				+  - code: SJ   # Svalbard and Jan Mayen — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: SK   # Slovakia
			
 
				+    blocked: true
			
 
				+  - code: SL   # Sierra Leone
			
 
				+    blocked: true
			
 
				+  - code: SM   # San Marino
			
 
				+    blocked: true
			
 
				+  - code: SN   # Senegal
			
 
				+    blocked: true
			
 
				+  - code: SO   # Somalia
			
 
				+    blocked: true
			
 
				+  - code: SR   # Suriname
			
 
				+    blocked: true
			
 
				+  - code: SS   # South Sudan
			
 
				+    blocked: true
			
 
				+  - code: ST   # Sao Tome and Principe
			
 
				+    blocked: true
			
 
				+  - code: SV   # El Salvador
			
 
				+    blocked: true
			
 
				+  - code: SX   # Sint Maarten
			
 
				+    blocked: true
			
 
				+  - code: SY   # Syria
			
 
				+    blocked: true
			
 
				+  - code: SZ   # Eswatini
			
 
				+    blocked: true
			
 
				+  - code: TC   # Turks and Caicos Islands
			
 
				+    blocked: true
			
 
				+  - code: TD   # Chad
			
 
				+    blocked: true
			
 
				+  - code: TF   # French Southern Territories — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: TG   # Togo
			
 
				+    blocked: true
			
 
				+  - code: TH   # Thailand
			
 
				+    blocked: true
			
 
				+  - code: TJ   # Tajikistan
			
 
				+    blocked: true
			
 
				+  - code: TK   # Tokelau
			
 
				+    blocked: true
			
 
				+  - code: TL   # Timor-Leste
			
 
				+    blocked: true
			
 
				+  - code: TM   # Turkmenistan
			
 
				+    blocked: true
			
 
				+  - code: TN   # Tunisia
			
 
				+    blocked: true
			
 
				+  - code: TO   # Tonga
			
 
				+    blocked: true
			
 
				+  - code: TR   # Turkey
			
 
				+    blocked: true
			
 
				+  - code: TT   # Trinidad and Tobago
			
 
				+    blocked: true
			
 
				+  - code: TV   # Tuvalu
			
 
				+    blocked: true
			
 
				+  - code: TW   # Taiwan
			
 
				+    blocked: true
			
 
				+  - code: TZ   # Tanzania
			
 
				+    blocked: true
			
 
				+  - code: UA   # Ukraine
			
 
				+    blocked: true
			
 
				+  - code: UG   # Uganda
			
 
				+    blocked: true
			
 
				+  - code: UM   # US Minor Outlying Islands
			
 
				+    blocked: true
			
 
				+  - code: US   # United States
			
 
				+    blocked: false
			
 
				+  - code: UY   # Uruguay
			
 
				+    blocked: true
			
 
				+  - code: UZ   # Uzbekistan
			
 
				+    blocked: true
			
 
				+  - code: VA   # Vatican City
			
 
				+    blocked: true
			
 
				+  - code: VC   # Saint Vincent and the Grenadines
			
 
				+    blocked: true
			
 
				+  - code: VE   # Venezuela
			
 
				+    blocked: true
			
 
				+  - code: VG   # British Virgin Islands
			
 
				+    blocked: true
			
 
				+  - code: VI   # US Virgin Islands
			
 
				+    blocked: true
			
 
				+  - code: VN   # Vietnam
			
 
				+    blocked: true
			
 
				+  - code: VU   # Vanuatu
			
 
				+    blocked: true
			
 
				+  - code: WF   # Wallis and Futuna
			
 
				+    blocked: true
			
 
				+  - code: WS   # Samoa
			
 
				+    blocked: true
			
 
				+  - code: XK   # Kosovo — no ipdeny zone file
			
 
				+    blocked: false
			
 
				+  - code: YE   # Yemen
			
 
				+    blocked: true
			
 
				+  - code: YT   # Mayotte
			
 
				+    blocked: true
			
 
				+  - code: ZA   # South Africa
			
 
				+    blocked: true
			
 
				+  - code: ZM   # Zambia
			
 
				+    blocked: true
			
 
				+  - code: ZW   # Zimbabwe
			
 
				+    blocked: true
			
--- a/tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml
+++ b/tftsr_nginx-hardening/roles/geo_blocking/handlers/main.yml
@@ -0,0 +1,4 @@
 
				+---
			
 
				+- name: reload nftables
			
 
				+  ansible.builtin.command: nft -f {{ geo_nft_file }}
			
 
				+  changed_when: true
			
--- a/tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml
+++ b/tftsr_nginx-hardening/roles/geo_blocking/tasks/main.yml
@@ -0,0 +1,103 @@
 
				+---
			
 
				+- name: Ensure nftables.d directory exists
			
 
				+  ansible.builtin.file:
			
 
				+    path: "{{ geo_nft_table_dir }}"
			
 
				+    state: directory
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0755'
			
 
				+
			
 
				+- name: Create temp directory for zone files
			
 
				+  ansible.builtin.tempfile:
			
 
				+    state: directory
			
 
				+    suffix: geo_zones
			
 
				+  register: geo_temp_dir
			
 
				+
			
 
				+# --- Source: live download ---
			
 
				+
			
 
				+- name: Test connectivity to ipdeny.com (fast pre-check)
			
 
				+  ansible.builtin.uri:
			
 
				+    url: "{{ geo_ipdeny_base_url }}/us-aggregated.zone"
			
 
				+    method: HEAD
			
 
				+    timeout: 8
			
 
				+  register: geo_connectivity_check
			
 
				+  ignore_errors: yes
			
 
				+  when: geo_zone_files_dir | length == 0
			
 
				+
			
 
				+- name: Fail fast if ipdeny.com is unreachable and no local cache configured
			
 
				+  ansible.builtin.fail:
			
 
				+    msg: >-
			
 
				+      Cannot reach ipdeny.com (connection timed out or refused) and
			
 
				+      geo_zone_files_dir is not set. Pre-download zone files on a machine
			
 
				+      with internet access using scripts/download-geo-zones.sh, copy them
			
 
				+      to this host, then set geo_zone_files_dir in inventory or with -e.
			
 
				+  when:
			
 
				+    - geo_zone_files_dir | length == 0
			
 
				+    - geo_connectivity_check is failed
			
 
				+
			
 
				+- name: Download zone files for blocked countries
			
 
				+  ansible.builtin.get_url:
			
 
				+    url: "{{ geo_ipdeny_base_url }}/{{ item.code | lower }}-aggregated.zone"
			
 
				+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
			
 
				+    timeout: 30
			
 
				+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.code }}"
			
 
				+  ignore_errors: yes
			
 
				+  when:
			
 
				+    - geo_zone_files_dir | length == 0
			
 
				+    - geo_connectivity_check is succeeded
			
 
				+
			
 
				+# --- Source: local pre-downloaded cache ---
			
 
				+
			
 
				+- name: Copy zone files from local cache directory
			
 
				+  ansible.builtin.copy:
			
 
				+    src: "{{ geo_zone_files_dir }}/{{ item.code | lower }}.zone"
			
 
				+    dest: "{{ geo_temp_dir.path }}/{{ item.code | lower }}.zone"
			
 
				+    remote_src: yes
			
 
				+  loop: "{{ geo_countries | selectattr('blocked', 'equalto', true) | list }}"
			
 
				+  loop_control:
			
 
				+    label: "{{ item.code }}"
			
 
				+  ignore_errors: yes
			
 
				+  when: geo_zone_files_dir | length > 0
			
 
				+
			
 
				+# --- Assemble and deploy ---
			
 
				+
			
 
				+- name: Assemble all CIDRs from downloaded zone files
			
 
				+  ansible.builtin.shell: >
			
 
				+    cat {{ geo_temp_dir.path }}/*.zone 2>/dev/null |
			
 
				+    grep -v '^#' | grep -v '^$' | sort -u
			
 
				+  register: geo_cidrs_raw
			
 
				+  changed_when: false
			
 
				+
			
 
				+- name: Set geo_blocked_cidrs fact
			
 
				+  ansible.builtin.set_fact:
			
 
				+    geo_blocked_cidrs: "{{ geo_cidrs_raw.stdout_lines }}"
			
 
				+
			
 
				+- name: Deploy geo-block nftables ruleset
			
 
				+  ansible.builtin.template:
			
 
				+    src: geo-block.nft.j2
			
 
				+    dest: "{{ geo_nft_file }}"
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nftables
			
 
				+
			
 
				+- name: Ensure nftables.conf includes geo-block.nft
			
 
				+  ansible.builtin.lineinfile:
			
 
				+    path: /etc/sysconfig/nftables.conf
			
 
				+    line: 'include "{{ geo_nft_file }}"'
			
 
				+    state: present
			
 
				+    backup: yes
			
 
				+
			
 
				+- name: Enable and start nftables service
			
 
				+  ansible.builtin.service:
			
 
				+    name: nftables
			
 
				+    state: started
			
 
				+    enabled: yes
			
 
				+
			
 
				+- name: Clean up temp directory
			
 
				+  ansible.builtin.file:
			
 
				+    path: "{{ geo_temp_dir.path }}"
			
 
				+    state: absent
			
--- a/tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
+++ b/tftsr_nginx-hardening/roles/geo_blocking/templates/geo-block.nft.j2
@@ -0,0 +1,26 @@
 
				+#!/usr/sbin/nft -f
			
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+# Ensure table exists, then flush for idempotency
			
 
				+add table inet geo_block
			
 
				+flush table inet geo_block
			
 
				+
			
 
				+table inet geo_block {
			
 
				+    set blocked_countries {
			
 
				+        type ipv4_addr
			
 
				+        flags interval
			
 
				+{% if geo_blocked_cidrs | length > 0 %}
			
 
				+        elements = {
			
 
				+{% for cidr in geo_blocked_cidrs %}
			
 
				+            {{ cidr }}{% if not loop.last %},{% endif %}
			
 
				+
			
 
				+{% endfor %}
			
 
				+        }
			
 
				+{% endif %}
			
 
				+    }
			
 
				+
			
 
				+    chain prerouting {
			
 
				+        type filter hook prerouting priority -100; policy accept;
			
 
				+        ip saddr @blocked_countries drop
			
 
				+    }
			
 
				+}
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/defaults/main.yml
@@ -0,0 +1,31 @@
 
				+---
			
 
				+nginx_ssl_protocols: "TLSv1.2 TLSv1.3"
			
 
				+nginx_ssl_ciphers: "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256"
			
 
				+nginx_hsts_max_age: 31536000
			
 
				+nginx_rate_limit_req_zone: "$binary_remote_addr zone=general:10m rate=30r/m"
			
 
				+nginx_client_max_body_size: "10m"
			
 
				+nginx_proxy_read_timeout: 60
			
 
				+
			
 
				+nginx_redirect_services:
			
 
				+  - name: gogs
			
 
				+    server_name: gogs.tftsr.com
			
 
				+  - name: homeassist
			
 
				+    server_name: homeassist.tftsr.com
			
 
				+  - name: kimai
			
 
				+    server_name: kimai.tftsr.com
			
 
				+  - name: ollama-ui
			
 
				+    server_name: ollama-ui.tftsr.com
			
 
				+  - name: overseerr
			
 
				+    server_name: overseerr.tftsr.com
			
 
				+  - name: plex
			
 
				+    server_name: plex.tftsr.com
			
 
				+  - name: portainer
			
 
				+    server_name: portainer.tftsr.com
			
 
				+  - name: radarr
			
 
				+    server_name: radarr.tftsr.com
			
 
				+  - name: retro
			
 
				+    server_name: retro.tftsr.com
			
 
				+  - name: sonarr
			
 
				+    server_name: sonarr.tftsr.com
			
 
				+  - name: trilium
			
 
				+    server_name: trilium.tftsr.com
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/handlers/main.yml
@@ -0,0 +1,5 @@
 
				+---
			
 
				+- name: reload nginx
			
 
				+  ansible.builtin.service:
			
 
				+    name: nginx
			
 
				+    state: reloaded
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/tasks/main.yml
@@ -0,0 +1,44 @@
 
				+---
			
 
				+- name: Deploy security headers configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: security_headers.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-security-headers.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy SSL parameters configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: ssl_params.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-ssl-params.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy proxy parameters configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: proxy_params.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-proxy-params.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Deploy HTTP to HTTPS redirect configuration
			
 
				+  ansible.builtin.template:
			
 
				+    src: http_redirect.conf.j2
			
 
				+    dest: /etc/nginx/conf.d/00-http-redirects.conf
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0644'
			
 
				+    backup: yes
			
 
				+  notify: reload nginx
			
 
				+
			
 
				+- name: Validate NGINX configuration
			
 
				+  ansible.builtin.command: nginx -t
			
 
				+  changed_when: false
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/templates/http_redirect.conf.j2
@@ -0,0 +1,8 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+{% for svc in nginx_redirect_services %}
			
 
				+server {
			
 
				+    listen 80;
			
 
				+    server_name {{ svc.server_name }};
			
 
				+    return 301 https://$host$request_uri;
			
 
				+}
			
 
				+{% endfor %}
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/templates/proxy_params.conf.j2
@@ -0,0 +1,8 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+proxy_hide_header X-Powered-By;
			
 
				+proxy_hide_header Server;
			
 
				+proxy_set_header X-Real-IP $remote_addr;
			
 
				+proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
			
 
				+proxy_set_header X-Forwarded-Proto $scheme;
			
 
				+proxy_read_timeout {{ nginx_proxy_read_timeout }};
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/templates/security_headers.conf.j2
@@ -0,0 +1,17 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+server_tokens off;
			
 
				+
			
 
				+# Rate limiting zone definition
			
 
				+limit_req_zone {{ nginx_rate_limit_req_zone }};
			
 
				+
			
 
				+# Client body size limit
			
 
				+client_max_body_size {{ nginx_client_max_body_size }};
			
 
				+
			
 
				+# Security headers
			
 
				+add_header Strict-Transport-Security "max-age={{ nginx_hsts_max_age }}; includeSubDomains; preload" always;
			
 
				+add_header X-Frame-Options SAMEORIGIN always;
			
 
				+add_header X-Content-Type-Options nosniff always;
			
 
				+add_header Referrer-Policy strict-origin-when-cross-origin always;
			
 
				+add_header Permissions-Policy "geolocation=(), microphone=(), camera=()" always;
			
 
				+add_header X-XSS-Protection "1; mode=block" always;
			
--- a/tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
+++ b/tftsr_nginx-hardening/roles/nginx_hardening/templates/ssl_params.conf.j2
@@ -0,0 +1,10 @@
 
				+# Managed by Ansible — do not edit manually
			
 
				+
			
 
				+ssl_protocols {{ nginx_ssl_protocols }};
			
 
				+ssl_ciphers {{ nginx_ssl_ciphers }};
			
 
				+ssl_prefer_server_ciphers off;
			
 
				+ssl_session_timeout 1d;
			
 
				+ssl_stapling on;
			
 
				+ssl_stapling_verify on;
			
 
				+resolver 8.8.8.8 8.8.4.4 valid=300s;
			
 
				+resolver_timeout 5s;
			
--- a/tftsr_nginx-hardening/scripts/download-geo-zones.sh
+++ b/tftsr_nginx-hardening/scripts/download-geo-zones.sh
@@ -0,0 +1,71 @@
 
				+#!/usr/bin/env bash
			
 
				+# Download ipdeny.com aggregated zone files for all blocked countries.
			
 
				+# Run this on a machine WITH internet access, then rsync the output
			
 
				+# directory to the DMZ host and set geo_zone_files_dir in your inventory.
			
 
				+#
			
 
				+# Usage:
			
 
				+#   ./scripts/download-geo-zones.sh [output-dir]
			
 
				+#
			
 
				+# Example workflow:
			
 
				+#   # On your workstation:
			
 
				+#   ./scripts/download-geo-zones.sh /tmp/geo_zones
			
 
				+#   rsync -av /tmp/geo_zones/ sarman@dmz-host:/opt/geo_zones/
			
 
				+#
			
 
				+#   # Then run the playbook pointing at the cache:
			
 
				+#   ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones
			
 
				+
			
 
				+set -euo pipefail
			
 
				+
			
 
				+BASE_URL="https://www.ipdeny.com/ipblocks/data/aggregated"
			
 
				+OUT_DIR="${1:-/tmp/geo_zones}"
			
 
				+
			
 
				+# All blocked country codes (excludes US and ipdeny-absent territories)
			
 
				+COUNTRIES=(
			
 
				+  AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ
			
 
				+  BA BB BD BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BW BY BZ
			
 
				+  CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CY CZ
			
 
				+  DE DJ DK DM DO DZ
			
 
				+  EC EE EG ER ES ET
			
 
				+  FI FJ FK FM FO FR
			
 
				+  GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GT GU GW GY
			
 
				+  HK HN HR HT HU
			
 
				+  ID IE IL IM IN IO IQ IR IS IT
			
 
				+  JE JM JO JP
			
 
				+  KE KG KH KI KM KN KP KR KW KY KZ
			
 
				+  LA LB LC LI LK LR LS LT LU LV LY
			
 
				+  MA MC MD ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ
			
 
				+  NA NC NE NF NG NI NL NO NP NR NU NZ
			
 
				+  OM
			
 
				+  PA PE PF PG PH PK PL PM PR PS PT PW PY
			
 
				+  QA
			
 
				+  RE RO RS RU RW
			
 
				+  SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ
			
 
				+  TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ
			
 
				+  UA UG UM UY UZ
			
 
				+  VA VC VE VG VI VN VU
			
 
				+  WF WS
			
 
				+  YE YT
			
 
				+  ZA ZM ZW
			
 
				+)
			
 
				+
			
 
				+mkdir -p "$OUT_DIR"
			
 
				+echo "Downloading ${#COUNTRIES[@]} zone files to $OUT_DIR ..."
			
 
				+
			
 
				+ok=0; fail=0
			
 
				+for cc in "${COUNTRIES[@]}"; do
			
 
				+  url="${BASE_URL}/${cc,,}-aggregated.zone"
			
 
				+  dest="${OUT_DIR}/${cc,,}.zone"
			
 
				+  if curl -fsSL --connect-timeout 10 --max-time 30 -o "$dest" "$url"; then
			
 
				+    (( ++ok ))
			
 
				+  else
			
 
				+    echo "  SKIP $cc (no zone file at ipdeny.com)"
			
 
				+    rm -f "$dest"
			
 
				+    (( ++fail ))
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+echo "Done: $ok downloaded, $fail skipped."
			
 
				+echo ""
			
 
				+echo "Next steps:"
			
 
				+echo "  rsync -av ${OUT_DIR}/ USER@DMZ_HOST:/opt/geo_zones/"
			
 
				+echo "  ansible-playbook -K playbooks/geo_blocking.yml -e geo_zone_files_dir=/opt/geo_zones"
			
--- a/tftsr_nginx-hardening/site.yml
+++ b/tftsr_nginx-hardening/site.yml
@@ -0,0 +1,8 @@
 
				+---
			
 
				+- hosts: localhost
			
 
				+  connection: local
			
 
				+  become: true
			
 
				+  roles:
			
 
				+    - nginx_hardening
			
 
				+    - fail2ban
			
 
				+    - geo_blocking