freaks it
This commit is contained in:
@@ -21,6 +21,7 @@
|
|||||||
nix.package = pkgs.lixPackageSets.stable.lix;
|
nix.package = pkgs.lixPackageSets.stable.lix;
|
||||||
|
|
||||||
nixpkgs.config.packageOverrides = pkgs: {
|
nixpkgs.config.packageOverrides = pkgs: {
|
||||||
|
|
||||||
# Stolen from https://www.nijho.lt/post/llama-nixos/
|
# Stolen from https://www.nijho.lt/post/llama-nixos/
|
||||||
llama-cpp =
|
llama-cpp =
|
||||||
(pkgs.llama-cpp.override {
|
(pkgs.llama-cpp.override {
|
||||||
@@ -32,12 +33,13 @@
|
|||||||
blasSupport = true;
|
blasSupport = true;
|
||||||
}).overrideAttrs
|
}).overrideAttrs
|
||||||
(oldAttrs: rec {
|
(oldAttrs: rec {
|
||||||
version = "8179";
|
version = "8184";
|
||||||
src = pkgs.fetchFromGitHub {
|
src = pkgs.fetchFromGitHub {
|
||||||
owner = "ggml-org";
|
owner = "aagit";
|
||||||
repo = "llama.cpp";
|
repo = "llama.cpp";
|
||||||
tag = "b${version}";
|
# tag = "b${version}";
|
||||||
hash = "sha256-LzJfbw4Xl3ktaLmys1Y2a6Ncthjb/DjS9qKrdcHE2+Q=";
|
rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
|
||||||
|
hash = "sha256-xryajW0Cs1d+WDijspMTW21FDaZP9Grkb+uErMQCQ48=";
|
||||||
leaveDotGit = true;
|
leaveDotGit = true;
|
||||||
postFetch = ''
|
postFetch = ''
|
||||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||||
@@ -75,6 +77,7 @@
|
|||||||
} -C $out/bin
|
} -C $out/bin
|
||||||
chmod +x $out/bin/llama-swap
|
chmod +x $out/bin/llama-swap
|
||||||
'';
|
'';
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
# 🇺🇸
|
# 🇺🇸
|
||||||
|
|||||||
@@ -87,15 +87,32 @@
|
|||||||
environment.etc."llama-swap/config.yaml".text = ''
|
environment.etc."llama-swap/config.yaml".text = ''
|
||||||
models:
|
models:
|
||||||
"Qwen3.5-35B-A3B-GGUF":
|
"Qwen3.5-35B-A3B-GGUF":
|
||||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --cache-type-k f16 --cache-type-v f16 -np 1 --cpu-moe -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --ubatch-size 16 --batch-size 16
|
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --cpu-moe --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32
|
||||||
ttl: 1200
|
ttl: 2400
|
||||||
"Qwen3-1.7B-GGUF":
|
"Qwen3-1.7B-GGUF":
|
||||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
|
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
|
||||||
ttl: 120
|
ttl: 120
|
||||||
"Qwen3-8B-GGUF":
|
"Qwen3-8B-GGUF":
|
||||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
|
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
|
||||||
ttl: 120
|
ttl: 120
|
||||||
'';
|
"Qwen3-4B-Claude-Opus-Distill":
|
||||||
|
cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
|
||||||
|
ttl: 120
|
||||||
|
"Qwen3.5-9B-Thinking":
|
||||||
|
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
|
||||||
|
ttl: 120
|
||||||
|
"Qwen3.5-4B-Thinking":
|
||||||
|
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
|
||||||
|
ttl: 120
|
||||||
|
"Qwen3.5-9B-Non-Thinking":
|
||||||
|
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
|
||||||
|
ttl: 120
|
||||||
|
"Qwen3.5-4B-Non-Thinking":
|
||||||
|
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
|
||||||
|
ttl: 120
|
||||||
|
"Qwen3.5-0.8B-Non-Thinking":
|
||||||
|
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
|
||||||
|
ttl: 120'';
|
||||||
|
|
||||||
# Set your time zone.
|
# Set your time zone.
|
||||||
# time.timeZone = "Europe/Amsterdam";
|
# time.timeZone = "Europe/Amsterdam";
|
||||||
|
|||||||
Reference in New Issue
Block a user