diff --git a/global/default.nix b/global/default.nix index 463a8a7..338fd46 100755 --- a/global/default.nix +++ b/global/default.nix @@ -21,6 +21,7 @@ nix.package = pkgs.lixPackageSets.stable.lix; nixpkgs.config.packageOverrides = pkgs: { + # Stolen from https://www.nijho.lt/post/llama-nixos/ llama-cpp = (pkgs.llama-cpp.override { @@ -32,12 +33,13 @@ blasSupport = true; }).overrideAttrs (oldAttrs: rec { - version = "8179"; + version = "8184"; src = pkgs.fetchFromGitHub { - owner = "ggml-org"; + owner = "aagit"; repo = "llama.cpp"; - tag = "b${version}"; - hash = "sha256-LzJfbw4Xl3ktaLmys1Y2a6Ncthjb/DjS9qKrdcHE2+Q="; + # tag = "b${version}"; + rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970 + hash = "sha256-xryajW0Cs1d+WDijspMTW21FDaZP9Grkb+uErMQCQ48="; leaveDotGit = true; postFetch = '' git -C "$out" rev-parse --short HEAD > $out/COMMIT @@ -75,6 +77,7 @@ } -C $out/bin chmod +x $out/bin/llama-swap ''; + }; # 🇺🇸 diff --git a/machines/homepc/configuration.nix b/machines/homepc/configuration.nix index efd9e2f..8be6ba5 100755 --- a/machines/homepc/configuration.nix +++ b/machines/homepc/configuration.nix @@ -87,15 +87,32 @@ environment.etc."llama-swap/config.yaml".text = '' models: "Qwen3.5-35B-A3B-GGUF": - cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --cache-type-k f16 --cache-type-v f16 -np 1 --cpu-moe -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --ubatch-size 16 --batch-size 16 - ttl: 1200 + cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --cpu-moe --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 + ttl: 2400 "Qwen3-1.7B-GGUF": cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu ttl: 120 "Qwen3-8B-GGUF": cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 ttl: 120 - ''; + "Qwen3-4B-Claude-Opus-Distill": + cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 + ttl: 120 + "Qwen3.5-9B-Thinking": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj" + ttl: 120 + "Qwen3.5-4B-Thinking": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'" + ttl: 120 + "Qwen3.5-9B-Non-Thinking": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj" + ttl: 120 + "Qwen3.5-4B-Non-Thinking": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" + ttl: 120 + "Qwen3.5-0.8B-Non-Thinking": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" + ttl: 120''; # Set your time zone. # time.timeZone = "Europe/Amsterdam";