04_models.yml 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. ---
  2. # playbooks/04_models.yml
  3. # Pull and register Ollama models based on benchmark selection
  4. - name: "Models | Pull and register Ollama models"
  5. hosts: ai_server
  6. become: true
  7. gather_facts: true
  8. tags:
  9. - models
  10. vars:
  11. model_selection_file: "{{ playbook_dir }}/../benchmarks/results/model_selection.json"
  12. modelfiles_dir: /mnt/ai_data/ollama_models/modelfiles
  13. slot4_model: "" # legacy override kept for backwards compatibility
  14. slot5_model: "" # overrides slot5_general_rotate
  15. slot6_model: "" # overrides slot6_coding_rotate
  16. ollama_api_key: "{{ lookup('community.hashi_vault.hashi_vault', vault_secret_prefix ~ '/ollama:api_key token=' ~ lookup('ansible.builtin.file', vault_token_file) ~ ' url=' ~ vault_url) }}"
  17. tasks:
  18. # ── Load benchmark results ───────────────────────────────────────
  19. - name: "Models | Read model_selection.json from control node"
  20. ansible.builtin.slurp:
  21. src: "{{ model_selection_file }}"
  22. delegate_to: localhost
  23. become: false
  24. register: model_selection_raw
  25. tags:
  26. - models-load
  27. - name: "Models | Parse model selection data"
  28. ansible.builtin.set_fact:
  29. model_selection: "{{ model_selection_raw.content | b64decode | from_json }}"
  30. tags:
  31. - models-load
  32. - name: "Models | Apply slot4 override if provided"
  33. ansible.builtin.set_fact:
  34. model_selection: "{{ model_selection | combine({'slot4_coding': slot4_model}) }}"
  35. when: slot4_model | length > 0
  36. tags:
  37. - models-load
  38. - name: "Models | Apply slot5 override if provided"
  39. ansible.builtin.set_fact:
  40. model_selection: "{{ model_selection | combine({'slot5_general_rotate': slot5_model}) }}"
  41. when: slot5_model | length > 0
  42. tags:
  43. - models-load
  44. - name: "Models | Apply slot6 override if provided"
  45. ansible.builtin.set_fact:
  46. model_selection: "{{ model_selection | combine({'slot6_coding_rotate': slot6_model}) }}"
  47. when: slot6_model | length > 0
  48. tags:
  49. - models-load
  50. - name: "Models | Display selected models"
  51. ansible.builtin.debug:
  52. msg:
  53. - "=== Node 1 — General (port 11434) ==="
  54. - "Slot 1 (locked): {{ model_selection.slot1_general }}"
  55. - "Slot 2 (locked): {{ model_selection.slot2_general }}"
  56. - "Slot 5 (rotate): {{ model_selection.slot5_general_rotate | default('none') }}"
  57. - "=== Node 0 — Coding (port 11435) ==="
  58. - "Slot 3 (locked): {{ model_selection.slot3_coding }}"
  59. - "Slot 4 (locked): {{ model_selection.slot4_coding }}"
  60. - "Slot 6 (rotate): {{ model_selection.slot6_coding_rotate | default('none') }}"
  61. tags:
  62. - models-load
  63. # ── Pull models ──────────────────────────────────────────────────
  64. - name: "Models | Get currently installed models"
  65. ansible.builtin.command: ollama list
  66. changed_when: false
  67. register: current_models
  68. tags:
  69. - models-pull
  70. - name: "Models | Set installed models list"
  71. ansible.builtin.set_fact:
  72. installed_model_names: >-
  73. {{ current_models.stdout_lines[1:] |
  74. default([]) |
  75. map('split') |
  76. map('first') |
  77. list }}
  78. tags:
  79. - models-pull
  80. - name: "Models | Pull slot models if not already present"
  81. ansible.builtin.command: "ollama pull {{ item }}"
  82. loop:
  83. - "{{ model_selection.slot1_general }}"
  84. - "{{ model_selection.slot2_general }}"
  85. - "{{ model_selection.slot5_general_rotate | default('none') }}"
  86. - "{{ model_selection.slot3_coding }}"
  87. - "{{ model_selection.slot4_coding }}"
  88. - "{{ model_selection.slot6_coding_rotate | default('none') }}"
  89. when:
  90. - item | length > 0
  91. - item != 'none'
  92. - item not in installed_model_names
  93. changed_when: true
  94. loop_control:
  95. label: "Pulling {{ item }}"
  96. tags:
  97. - models-pull
  98. - name: "Models | Pull baseline models if not already present"
  99. ansible.builtin.command: "ollama pull {{ item }}"
  100. loop: "{{ baseline_models }}"
  101. when: item not in installed_model_names
  102. changed_when: true
  103. loop_control:
  104. label: "Pulling {{ item }}"
  105. tags:
  106. - models-pull
  107. # ── Create Modelfiles ────────────────────────────────────────────
  108. - name: "Models | Create modelfiles directory"
  109. ansible.builtin.file:
  110. path: "{{ modelfiles_dir }}"
  111. state: directory
  112. mode: "0755"
  113. owner: root
  114. group: root
  115. tags:
  116. - models-modelfile
  117. - name: "Models | Template coder-128k Modelfile"
  118. ansible.builtin.copy:
  119. content: |
  120. FROM {{ model_selection.slot3_coding }}
  121. PARAMETER num_ctx 32768
  122. SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
  123. dest: "{{ modelfiles_dir }}/Modelfile.coder-128k"
  124. mode: "0644"
  125. tags:
  126. - models-modelfile
  127. - name: "Models | Template coder-32k Modelfile"
  128. ansible.builtin.copy:
  129. content: |
  130. FROM {{ model_selection.slot4_coding }}
  131. PARAMETER num_ctx 32768
  132. SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
  133. dest: "{{ modelfiles_dir }}/Modelfile.coder-32k"
  134. mode: "0644"
  135. when:
  136. - model_selection.slot4_coding | length > 0
  137. - model_selection.slot4_coding != 'none'
  138. tags:
  139. - models-modelfile
  140. - name: "Models | Template coder-rotate Modelfile"
  141. ansible.builtin.copy:
  142. content: |
  143. FROM {{ model_selection.slot6_coding_rotate }}
  144. PARAMETER num_ctx 32768
  145. SYSTEM You are an expert coding assistant. You write clean, efficient, well-documented code. Always include type hints and follow best practices.
  146. dest: "{{ modelfiles_dir }}/Modelfile.coder-rotate"
  147. mode: "0644"
  148. when:
  149. - model_selection.slot6_coding_rotate | default('') | length > 0
  150. - model_selection.slot6_coding_rotate | default('none') != 'none'
  151. tags:
  152. - models-modelfile
  153. - name: "Models | Template llama-family Modelfile"
  154. ansible.builtin.copy:
  155. content: |
  156. FROM llama3.2:3b
  157. PARAMETER num_ctx 8192
  158. SYSTEM You are a helpful, friendly family assistant. Provide safe, age-appropriate responses suitable for all family members.
  159. dest: "{{ modelfiles_dir }}/Modelfile.llama-family"
  160. mode: "0644"
  161. tags:
  162. - models-modelfile
  163. - name: "Models | Template gemma-family Modelfile"
  164. ansible.builtin.copy:
  165. content: |
  166. FROM llama3.1:8b
  167. PARAMETER num_ctx 8192
  168. SYSTEM You are a helpful, friendly family assistant. Provide safe, age-appropriate responses suitable for all family members.
  169. dest: "{{ modelfiles_dir }}/Modelfile.gemma-family"
  170. mode: "0644"
  171. tags:
  172. - models-modelfile
  173. # ── Register models ──────────────────────────────────────────────
  174. - name: "Models | Register custom models with Ollama"
  175. ansible.builtin.command: "ollama create {{ item.name }} -f {{ modelfiles_dir }}/{{ item.file }}"
  176. loop:
  177. - { name: "coder-128k", file: "Modelfile.coder-128k" }
  178. - { name: "coder-32k", file: "Modelfile.coder-32k", slot: "{{ model_selection.slot4_coding }}" }
  179. - { name: "coder-rotate", file: "Modelfile.coder-rotate", slot: "{{ model_selection.slot6_coding_rotate | default('none') }}" }
  180. - { name: "llama-family", file: "Modelfile.llama-family" }
  181. - { name: "gemma-family", file: "Modelfile.gemma-family" }
  182. when: item.slot is not defined or (item.slot | length > 0 and item.slot != 'none')
  183. changed_when: true
  184. loop_control:
  185. label: "Creating {{ item.name }}"
  186. tags:
  187. - models-register
  188. # ── Warmup service ───────────────────────────────────────────────
  189. - name: "Models | Template warmup script"
  190. ansible.builtin.template:
  191. src: "{{ playbook_dir }}/../templates/ollama/warmup.sh.j2"
  192. dest: /usr/local/bin/ollama-warmup.sh
  193. mode: "0755"
  194. owner: root
  195. group: root
  196. tags:
  197. - models-warmup
  198. - name: "Models | Template warmup systemd service"
  199. ansible.builtin.template:
  200. src: "{{ playbook_dir }}/../templates/systemd/ollama-warmup.service.j2"
  201. dest: /etc/systemd/system/ollama-warmup.service
  202. mode: "0644"
  203. owner: root
  204. group: root
  205. tags:
  206. - models-warmup
  207. - name: "Models | Reload systemd daemon"
  208. ansible.builtin.systemd:
  209. daemon_reload: true
  210. tags:
  211. - models-warmup
  212. - name: "Models | Enable and start warmup service"
  213. ansible.builtin.systemd:
  214. name: ollama-warmup
  215. enabled: true
  216. state: started
  217. tags:
  218. - models-warmup
  219. # ── Node0 warmup service ─────────────────────────────────────────
  220. - name: "Models | Template node0 warmup script"
  221. ansible.builtin.template:
  222. src: "{{ playbook_dir }}/../templates/ollama/warmup-node0.sh.j2"
  223. dest: /usr/local/bin/ollama-warmup-node0.sh
  224. mode: "0755"
  225. owner: root
  226. group: root
  227. tags:
  228. - models-warmup
  229. - name: "Models | Template node0 warmup systemd service"
  230. ansible.builtin.template:
  231. src: "{{ playbook_dir }}/../templates/systemd/ollama-warmup-node0.service.j2"
  232. dest: /etc/systemd/system/ollama-warmup-node0.service
  233. mode: "0644"
  234. owner: root
  235. group: root
  236. tags:
  237. - models-warmup
  238. - name: "Models | Enable and start node0 warmup service"
  239. ansible.builtin.systemd:
  240. name: ollama-warmup-node0
  241. enabled: true
  242. state: started
  243. daemon_reload: true
  244. tags:
  245. - models-warmup