| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308 |
- ---
- # playbooks/02_infrastructure.yml
- # Install Docker and configure Ollama on ai_server
- - name: "Infrastructure | Docker and Ollama setup on ai_server"
- hosts: ai_server
- become: true
- gather_facts: true
- tags:
- - infrastructure
- vars:
- vault_token_file: "{{ playbook_dir }}/../vault/.vault-token"
- vault_url: "http://{{ ai_server_ip }}:{{ vault_port }}"
- pre_tasks:
- - name: "Infrastructure | Install Python Docker SDK prerequisites"
- ansible.builtin.dnf:
- name:
- - python3-pip
- - python3-requests
- - numactl
- state: present
- tags: always
- - name: "Infrastructure | Install Python docker SDK via pip"
- ansible.builtin.pip:
- name: docker
- state: present
- executable: pip3
- tags: always
- tasks:
- # ── Docker installation ──────────────────────────────────────────
- - name: "Docker | Check if Docker CE repo is already configured"
- ansible.builtin.stat:
- path: /etc/yum.repos.d/docker-ce.repo
- register: docker_repo_file
- tags:
- - docker
- - name: "Docker | Add Docker CE repository"
- ansible.builtin.command:
- cmd: dnf config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo
- when: not docker_repo_file.stat.exists
- changed_when: true
- tags:
- - docker
- - name: "Docker | Install Docker CE packages"
- ansible.builtin.dnf:
- name:
- - docker-ce
- - docker-ce-cli
- - containerd.io
- - docker-compose-plugin
- state: present
- tags:
- - docker
- - name: "Docker | Add {{ ansible_user }} to docker group"
- ansible.builtin.user:
- name: "{{ ansible_user }}"
- groups: docker
- append: true
- tags:
- - docker
- - name: "Docker | Add ollama user to docker group"
- ansible.builtin.user:
- name: ollama
- groups: docker
- append: true
- tags:
- - docker
- - name: "Docker | Start and enable docker.service"
- ansible.builtin.systemd:
- name: docker
- state: started
- enabled: true
- tags:
- - docker
- # ── Ollama installation and configuration ────────────────────────
- - name: "Ollama | Check if ollama binary exists"
- ansible.builtin.stat:
- path: "{{ item }}"
- loop:
- - /usr/local/bin/ollama
- - /usr/bin/ollama
- register: ollama_binary_check
- tags:
- - ollama
- - name: "Ollama | Set ollama installed fact"
- ansible.builtin.set_fact:
- ollama_installed: "{{ ollama_binary_check.results | selectattr('stat.exists', 'equalto', true) | list | length > 0 }}"
- tags:
- - ollama
- - name: "Ollama | Install Ollama"
- ansible.builtin.shell:
- cmd: curl -fsSL https://ollama.ai/install.sh | sh
- when: not ollama_installed
- changed_when: true
- tags:
- - ollama
- - name: "Ollama | Retrieve OLLAMA_API_KEY from Vault"
- ansible.builtin.set_fact:
- ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
- tags:
- - ollama
- - name: "Ollama | Create systemd override directory"
- ansible.builtin.file:
- path: /etc/systemd/system/ollama.service.d
- state: directory
- mode: "0755"
- owner: root
- group: root
- tags:
- - ollama
- - name: "Ollama | Template systemd override configuration"
- ansible.builtin.template:
- src: "{{ playbook_dir }}/../templates/ollama/override.conf.j2"
- dest: /etc/systemd/system/ollama.service.d/override.conf
- mode: "0644"
- owner: root
- group: root
- notify:
- - Reload systemd and restart ollama
- tags:
- - ollama
- - name: "Ollama | Ensure Ollama is running"
- ansible.builtin.systemd:
- name: ollama
- state: started
- enabled: true
- tags:
- - ollama
- - name: "Ollama | Wait for Ollama API to be ready"
- ansible.builtin.uri:
- url: "http://localhost:11434/api/tags"
- method: GET
- status_code: 200
- timeout: 10
- register: ollama_ready
- retries: 24
- delay: 5
- until: ollama_ready.status == 200
- tags:
- - ollama
- - name: "Ollama | Deploy ollama-node0 systemd unit"
- ansible.builtin.template:
- src: "{{ playbook_dir }}/../templates/ollama/ollama-node0.service.j2"
- dest: /etc/systemd/system/ollama-node0.service
- mode: "0644"
- owner: root
- group: root
- notify:
- - Reload systemd and start ollama-node0
- tags:
- - ollama
- - name: "Ollama | Enable and start ollama-node0"
- ansible.builtin.systemd:
- name: ollama-node0
- enabled: true
- state: started
- daemon_reload: true
- tags:
- - ollama
- - name: "Ollama | Wait for ollama-node0 API to be ready"
- ansible.builtin.uri:
- url: "http://localhost:{{ ollama_node0_port }}/api/tags"
- method: GET
- headers:
- Authorization: "Bearer {{ ollama_api_key }}"
- status_code: 200
- timeout: 10
- register: ollama_node0_ready
- retries: 24
- delay: 5
- until: ollama_node0_ready.status == 200
- tags:
- - ollama
- # ── OS-level kernel tuning for dedicated inference server ────────────────
- - name: "OS Tune | Apply sysctl settings for inference workload"
- ansible.posix.sysctl:
- name: "{{ item.name }}"
- value: "{{ item.value }}"
- sysctl_file: /etc/sysctl.d/99-ollama-perf.conf
- reload: true
- state: present
- loop:
- # Disable auto-NUMA migration — CPUAffinity pins Ollama to node 1/0
- # physical cores; NUMA balancing could migrate pages mid-inference.
- - { name: kernel.numa_balancing, value: "0" }
- # Near-zero swappiness: prevents model weights being paged out under
- # memory pressure (complements LimitMEMLOCK=infinity in the unit file).
- - { name: vm.swappiness, value: "1" }
- # Required for mlock to succeed without reservation failures.
- - { name: vm.overcommit_memory, value: "1" }
- tags:
- - os-tune
- - name: "OS Tune | Set Transparent Huge Pages to madvise (immediate)"
- ansible.builtin.shell:
- cmd: echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
- changed_when: true
- tags:
- - os-tune
- - name: "OS Tune | Install THP madvise persistence service"
- ansible.builtin.copy:
- dest: /etc/systemd/system/thp-madvise.service
- mode: "0644"
- owner: root
- group: root
- content: |
- [Unit]
- Description=Set Transparent Huge Pages to madvise
- After=local-fs.target
- [Service]
- Type=oneshot
- ExecStart=/bin/sh -c 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'
- RemainAfterExit=yes
- [Install]
- WantedBy=multi-user.target
- notify:
- - Reload systemd daemon
- tags:
- - os-tune
- - name: "OS Tune | Enable THP madvise persistence service"
- ansible.builtin.systemd:
- name: thp-madvise.service
- enabled: true
- daemon_reload: false
- tags:
- - os-tune
- - name: "OS Tune | Set CPU governor to performance (immediate)"
- ansible.builtin.shell:
- cmd: |
- for gov in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
- [ -f "$gov" ] && echo performance > "$gov"
- done
- changed_when: true
- tags:
- - os-tune
- - name: "OS Tune | Install CPU performance governor persistence service"
- ansible.builtin.copy:
- dest: /etc/systemd/system/cpu-performance.service
- mode: "0644"
- owner: root
- group: root
- content: |
- [Unit]
- Description=Set CPU scaling governor to performance
- After=local-fs.target
- [Service]
- Type=oneshot
- ExecStart=/bin/sh -c 'for gov in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do [ -f "$gov" ] && echo performance > "$gov"; done'
- RemainAfterExit=yes
- [Install]
- WantedBy=multi-user.target
- notify:
- - Reload systemd daemon
- tags:
- - os-tune
- - name: "OS Tune | Enable CPU performance governor persistence service"
- ansible.builtin.systemd:
- name: cpu-performance.service
- enabled: true
- daemon_reload: false
- tags:
- - os-tune
- handlers:
- - name: Reload systemd and restart ollama
- ansible.builtin.systemd:
- name: ollama
- state: restarted
- daemon_reload: true
- - name: Reload systemd and start ollama-node0
- ansible.builtin.systemd:
- name: ollama-node0
- state: started
- daemon_reload: true
- - name: Reload systemd daemon
- ansible.builtin.systemd:
- daemon_reload: true
|