qwen my beloved <3, now declarative!
This commit is contained in:
@@ -39,6 +39,9 @@
|
||||
# };
|
||||
|
||||
# This is here because I don't have another computer that could run local AI, and regardless the packages would be different on every one.
|
||||
# TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
|
||||
# And this config is getting a bit complicated for a single pc config
|
||||
# Should be moved to it's own shit
|
||||
environment.systemPackages = with pkgs; [
|
||||
ollama-cuda
|
||||
opencode
|
||||
@@ -80,6 +83,20 @@
|
||||
};
|
||||
};
|
||||
|
||||
# As long as this is here the models are declarative. llama-server will grab them if not downloaded already.
|
||||
environment.etc."llama-swap/config.yaml".text = ''
|
||||
models:
|
||||
"Qwen3.5-35B-A3B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --cache-type-k f16 --cache-type-v f16 -np 1 --cpu-moe -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --ubatch-size 16 --batch-size 16
|
||||
ttl: 1200
|
||||
"Qwen3-1.7B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
|
||||
ttl: 120
|
||||
"Qwen3-8B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
|
||||
ttl: 120
|
||||
'';
|
||||
|
||||
# Set your time zone.
|
||||
# time.timeZone = "Europe/Amsterdam";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user