model_selection.json 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. {
  2. "all_metrics": {
  3. "codellama:13b-instruct-q5_K_M": {
  4. "avg_tok_per_sec": 4.1,
  5. "category": "general",
  6. "coding_composite": 0.568,
  7. "coding_quality": 0.804,
  8. "general_composite": 0.508,
  9. "general_quality": 0.671,
  10. "latency_ms": 1126.4,
  11. "latency_score": 0.775,
  12. "toks_norm": 0.041
  13. },
  14. "codestral:22b-v0.1-q4_K_M": {
  15. "avg_tok_per_sec": 2.3,
  16. "category": "general",
  17. "coding_composite": 0.32,
  18. "coding_quality": 0.696,
  19. "general_composite": 0.406,
  20. "general_quality": 0.887,
  21. "latency_ms": 58429.3,
  22. "latency_score": 0,
  23. "toks_norm": 0.023
  24. },
  25. "deepseek-coder-v2:16b-lite-instruct-q4_K_M": {
  26. "avg_tok_per_sec": 21.3,
  27. "category": "general",
  28. "coding_composite": 0.618,
  29. "coding_quality": 0.855,
  30. "general_composite": 0.683,
  31. "general_quality": 1.0,
  32. "latency_ms": 1617.0,
  33. "latency_score": 0.677,
  34. "toks_norm": 0.213
  35. },
  36. "deepseek-r1:14b": {
  37. "avg_tok_per_sec": 6.4,
  38. "category": "general",
  39. "coding_composite": 0.519,
  40. "coding_quality": 0.853,
  41. "general_composite": 0.562,
  42. "general_quality": 0.948,
  43. "latency_ms": 2677.7,
  44. "latency_score": 0.464,
  45. "toks_norm": 0.064
  46. },
  47. "dolphin-mixtral:8x7b": {
  48. "avg_tok_per_sec": 4.8,
  49. "category": "general",
  50. "coding_composite": 0.451,
  51. "coding_quality": 0.755,
  52. "general_composite": 0.437,
  53. "general_quality": 0.725,
  54. "latency_ms": 3065.7,
  55. "latency_score": 0.387,
  56. "toks_norm": 0.048
  57. },
  58. "gpt-oss:20b": {
  59. "avg_tok_per_sec": 10.3,
  60. "category": "general",
  61. "coding_composite": 0.471,
  62. "coding_quality": 0.978,
  63. "general_composite": 0.447,
  64. "general_quality": 0.925,
  65. "latency_ms": 8158.0,
  66. "latency_score": 0,
  67. "toks_norm": 0.103
  68. },
  69. "mistral:7b-instruct": {
  70. "avg_tok_per_sec": 12.1,
  71. "category": "general",
  72. "coding_composite": 0.417,
  73. "coding_quality": 0.846,
  74. "general_composite": 0.359,
  75. "general_quality": 0.717,
  76. "latency_ms": 6696.2,
  77. "latency_score": 0,
  78. "toks_norm": 0.121
  79. },
  80. "phi4:14b": {
  81. "avg_tok_per_sec": 6.6,
  82. "category": "general",
  83. "coding_composite": 0.457,
  84. "coding_quality": 0.904,
  85. "general_composite": 0.469,
  86. "general_quality": 0.931,
  87. "latency_ms": 4394.9,
  88. "latency_score": 0.121,
  89. "toks_norm": 0.066
  90. },
  91. "qwen2.5-coder:14b-instruct-q4_K_M": {
  92. "avg_tok_per_sec": 4.9,
  93. "category": "general",
  94. "coding_composite": 0.393,
  95. "coding_quality": 0.84,
  96. "general_composite": 0.396,
  97. "general_quality": 0.848,
  98. "latency_ms": 6865.3,
  99. "latency_score": 0,
  100. "toks_norm": 0.049
  101. },
  102. "qwen2.5-coder:7b-instruct-q4_K_M": {
  103. "avg_tok_per_sec": 11.5,
  104. "category": "general",
  105. "coding_composite": 0.593,
  106. "coding_quality": 0.83,
  107. "general_composite": 0.619,
  108. "general_quality": 0.887,
  109. "latency_ms": 1301.7,
  110. "latency_score": 0.74,
  111. "toks_norm": 0.115
  112. },
  113. "qwen2.5-coder:7b-instruct-q5_K_M": {
  114. "avg_tok_per_sec": 9.0,
  115. "category": "general",
  116. "coding_composite": 0.496,
  117. "coding_quality": 0.81,
  118. "general_composite": 0.548,
  119. "general_quality": 0.925,
  120. "latency_ms": 2900.9,
  121. "latency_score": 0.42,
  122. "toks_norm": 0.09
  123. },
  124. "qwen2.5-coder:7b-instruct-q6_K": {
  125. "avg_tok_per_sec": 5.9,
  126. "category": "general",
  127. "coding_composite": 0.536,
  128. "coding_quality": 0.832,
  129. "general_composite": 0.576,
  130. "general_quality": 0.919,
  131. "latency_ms": 2112.8,
  132. "latency_score": 0.577,
  133. "toks_norm": 0.059
  134. },
  135. "qwen3-coder-next:latest": {
  136. "avg_tok_per_sec": 4.6,
  137. "category": "general",
  138. "coding_composite": 0.444,
  139. "coding_quality": 0.785,
  140. "general_composite": 0.492,
  141. "general_quality": 0.892,
  142. "latency_ms": 3462.7,
  143. "latency_score": 0.307,
  144. "toks_norm": 0.046
  145. },
  146. "qwen3-coder:30b": {
  147. "avg_tok_per_sec": 7.9,
  148. "category": "general",
  149. "coding_composite": 0.584,
  150. "coding_quality": 0.885,
  151. "general_composite": 0.578,
  152. "general_quality": 0.872,
  153. "latency_ms": 1769.0,
  154. "latency_score": 0.646,
  155. "toks_norm": 0.079
  156. },
  157. "qwen3.5:35b": {
  158. "avg_tok_per_sec": 5.3,
  159. "category": "general",
  160. "coding_composite": 0.411,
  161. "coding_quality": 0.879,
  162. "general_composite": 0.466,
  163. "general_quality": 1.0,
  164. "latency_ms": 133176.0,
  165. "latency_score": 0,
  166. "toks_norm": 0.053
  167. }
  168. },
  169. "coding_ranking": [],
  170. "general_ranking": [
  171. {
  172. "composite": 0.683,
  173. "metrics": {
  174. "avg_tok_per_sec": 21.3,
  175. "category": "general",
  176. "coding_composite": 0.618,
  177. "coding_quality": 0.855,
  178. "general_composite": 0.683,
  179. "general_quality": 1.0,
  180. "latency_ms": 1617.0,
  181. "latency_score": 0.677,
  182. "toks_norm": 0.213
  183. },
  184. "name": "deepseek-coder-v2:16b-lite-instruct-q4_K_M"
  185. },
  186. {
  187. "composite": 0.619,
  188. "metrics": {
  189. "avg_tok_per_sec": 11.5,
  190. "category": "general",
  191. "coding_composite": 0.593,
  192. "coding_quality": 0.83,
  193. "general_composite": 0.619,
  194. "general_quality": 0.887,
  195. "latency_ms": 1301.7,
  196. "latency_score": 0.74,
  197. "toks_norm": 0.115
  198. },
  199. "name": "qwen2.5-coder:7b-instruct-q4_K_M"
  200. },
  201. {
  202. "composite": 0.578,
  203. "metrics": {
  204. "avg_tok_per_sec": 7.9,
  205. "category": "general",
  206. "coding_composite": 0.584,
  207. "coding_quality": 0.885,
  208. "general_composite": 0.578,
  209. "general_quality": 0.872,
  210. "latency_ms": 1769.0,
  211. "latency_score": 0.646,
  212. "toks_norm": 0.079
  213. },
  214. "name": "qwen3-coder:30b"
  215. },
  216. {
  217. "composite": 0.576,
  218. "metrics": {
  219. "avg_tok_per_sec": 5.9,
  220. "category": "general",
  221. "coding_composite": 0.536,
  222. "coding_quality": 0.832,
  223. "general_composite": 0.576,
  224. "general_quality": 0.919,
  225. "latency_ms": 2112.8,
  226. "latency_score": 0.577,
  227. "toks_norm": 0.059
  228. },
  229. "name": "qwen2.5-coder:7b-instruct-q6_K"
  230. },
  231. {
  232. "composite": 0.562,
  233. "metrics": {
  234. "avg_tok_per_sec": 6.4,
  235. "category": "general",
  236. "coding_composite": 0.519,
  237. "coding_quality": 0.853,
  238. "general_composite": 0.562,
  239. "general_quality": 0.948,
  240. "latency_ms": 2677.7,
  241. "latency_score": 0.464,
  242. "toks_norm": 0.064
  243. },
  244. "name": "deepseek-r1:14b"
  245. },
  246. {
  247. "composite": 0.548,
  248. "metrics": {
  249. "avg_tok_per_sec": 9.0,
  250. "category": "general",
  251. "coding_composite": 0.496,
  252. "coding_quality": 0.81,
  253. "general_composite": 0.548,
  254. "general_quality": 0.925,
  255. "latency_ms": 2900.9,
  256. "latency_score": 0.42,
  257. "toks_norm": 0.09
  258. },
  259. "name": "qwen2.5-coder:7b-instruct-q5_K_M"
  260. },
  261. {
  262. "composite": 0.508,
  263. "metrics": {
  264. "avg_tok_per_sec": 4.1,
  265. "category": "general",
  266. "coding_composite": 0.568,
  267. "coding_quality": 0.804,
  268. "general_composite": 0.508,
  269. "general_quality": 0.671,
  270. "latency_ms": 1126.4,
  271. "latency_score": 0.775,
  272. "toks_norm": 0.041
  273. },
  274. "name": "codellama:13b-instruct-q5_K_M"
  275. },
  276. {
  277. "composite": 0.492,
  278. "metrics": {
  279. "avg_tok_per_sec": 4.6,
  280. "category": "general",
  281. "coding_composite": 0.444,
  282. "coding_quality": 0.785,
  283. "general_composite": 0.492,
  284. "general_quality": 0.892,
  285. "latency_ms": 3462.7,
  286. "latency_score": 0.307,
  287. "toks_norm": 0.046
  288. },
  289. "name": "qwen3-coder-next:latest"
  290. },
  291. {
  292. "composite": 0.469,
  293. "metrics": {
  294. "avg_tok_per_sec": 6.6,
  295. "category": "general",
  296. "coding_composite": 0.457,
  297. "coding_quality": 0.904,
  298. "general_composite": 0.469,
  299. "general_quality": 0.931,
  300. "latency_ms": 4394.9,
  301. "latency_score": 0.121,
  302. "toks_norm": 0.066
  303. },
  304. "name": "phi4:14b"
  305. },
  306. {
  307. "composite": 0.466,
  308. "metrics": {
  309. "avg_tok_per_sec": 5.3,
  310. "category": "general",
  311. "coding_composite": 0.411,
  312. "coding_quality": 0.879,
  313. "general_composite": 0.466,
  314. "general_quality": 1.0,
  315. "latency_ms": 133176.0,
  316. "latency_score": 0,
  317. "toks_norm": 0.053
  318. },
  319. "name": "qwen3.5:35b"
  320. },
  321. {
  322. "composite": 0.447,
  323. "metrics": {
  324. "avg_tok_per_sec": 10.3,
  325. "category": "general",
  326. "coding_composite": 0.471,
  327. "coding_quality": 0.978,
  328. "general_composite": 0.447,
  329. "general_quality": 0.925,
  330. "latency_ms": 8158.0,
  331. "latency_score": 0,
  332. "toks_norm": 0.103
  333. },
  334. "name": "gpt-oss:20b"
  335. },
  336. {
  337. "composite": 0.437,
  338. "metrics": {
  339. "avg_tok_per_sec": 4.8,
  340. "category": "general",
  341. "coding_composite": 0.451,
  342. "coding_quality": 0.755,
  343. "general_composite": 0.437,
  344. "general_quality": 0.725,
  345. "latency_ms": 3065.7,
  346. "latency_score": 0.387,
  347. "toks_norm": 0.048
  348. },
  349. "name": "dolphin-mixtral:8x7b"
  350. },
  351. {
  352. "composite": 0.406,
  353. "metrics": {
  354. "avg_tok_per_sec": 2.3,
  355. "category": "general",
  356. "coding_composite": 0.32,
  357. "coding_quality": 0.696,
  358. "general_composite": 0.406,
  359. "general_quality": 0.887,
  360. "latency_ms": 58429.3,
  361. "latency_score": 0,
  362. "toks_norm": 0.023
  363. },
  364. "name": "codestral:22b-v0.1-q4_K_M"
  365. },
  366. {
  367. "composite": 0.396,
  368. "metrics": {
  369. "avg_tok_per_sec": 4.9,
  370. "category": "general",
  371. "coding_composite": 0.393,
  372. "coding_quality": 0.84,
  373. "general_composite": 0.396,
  374. "general_quality": 0.848,
  375. "latency_ms": 6865.3,
  376. "latency_score": 0,
  377. "toks_norm": 0.049
  378. },
  379. "name": "qwen2.5-coder:14b-instruct-q4_K_M"
  380. },
  381. {
  382. "composite": 0.359,
  383. "metrics": {
  384. "avg_tok_per_sec": 12.1,
  385. "category": "general",
  386. "coding_composite": 0.417,
  387. "coding_quality": 0.846,
  388. "general_composite": 0.359,
  389. "general_quality": 0.717,
  390. "latency_ms": 6696.2,
  391. "latency_score": 0,
  392. "toks_norm": 0.121
  393. },
  394. "name": "mistral:7b-instruct"
  395. }
  396. ],
  397. "slot1_general": "deepseek-coder-v2:16b-lite-instruct-q4_K_M",
  398. "slot2_general": "qwen2.5-coder:7b-instruct-q4_K_M",
  399. "slot3_coding": "deepseek-coder-v2:16b-lite-instruct-q4_K_M",
  400. "slot4_coding": "none"
  401. }