qwen my beloved <3, now declarative!

2026-03-01 13:06:37 -05:00
parent a92363a211
commit c8bc205d3c
2 changed files with 22 additions and 3 deletions
--- a/global/default.nix
+++ b/global/default.nix
@@ -32,18 +32,20 @@
        blasSupport = true;
      }).overrideAttrs
        (oldAttrs: rec {
-          version = "8162";
+          version = "8179";
          src = pkgs.fetchFromGitHub {
            owner = "ggml-org";
            repo = "llama.cpp";
            tag = "b${version}";
-            hash = "sha256-Tn6Trhvmm+n7qyjSOD6WpnZmEHGrkHqZE6I0HQE1TPY=";
+            hash = "sha256-LzJfbw4Xl3ktaLmys1Y2a6Ncthjb/DjS9qKrdcHE2+Q=";
            leaveDotGit = true;
            postFetch = ''
              git -C "$out" rev-parse --short HEAD > $out/COMMIT
              find "$out" -name .git -print0 | xargs -0 rm -rf
            '';
          };
+          # Must update npm deps hash to match the new version's webui dependencies
+          npmDepsHash = "sha256-FKjoZTKm0ddoVdpxzYrRUmTiuafEfbKc4UD2fz2fb8A=";
          # Enable native CPU optimizations for massively better CPU performance
          # This enables AVX, AVX2, AVX-512, FMA, etc. for your specific CPU
          # NOTE: This is intentionally opposite of nixpkgs (which uses -DGGML_NATIVE=off
@@ -165,7 +167,7 @@
  environment.systemPackages = with pkgs; [
    git
    wget
-    wineWowPackages.stable # Heavy but really annoying to not have when you need it
+    wineWow64Packages.stable # Heavy but really annoying to not have when you need it
    winetricks
    gparted
    unrar
--- a/machines/homepc/configuration.nix
+++ b/machines/homepc/configuration.nix
@@ -39,6 +39,9 @@
  # };

  # This is here because I don't have another computer that could run local AI, and regardless the packages would be different on every one.
+  # TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
+  # And this config is getting a bit complicated for a single pc config
+  # Should be moved to it's own shit
  environment.systemPackages = with pkgs; [
    ollama-cuda
    opencode
@@ -80,6 +83,20 @@
    };
  };

+  # As long as this is here the models are declarative. llama-server will grab them if not downloaded already.
+  environment.etc."llama-swap/config.yaml".text = ''
+    models:
+      "Qwen3.5-35B-A3B-GGUF":
+        cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --cache-type-k f16 --cache-type-v f16 -np 1  --cpu-moe -fa on  --slots --slot-save-path /home/laythe/llamapcache  --jinja -kvu --no-mmproj --ubatch-size 16 --batch-size 16
+        ttl: 1200
+      "Qwen3-1.7B-GGUF":
+        cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
+        ttl: 120
+      "Qwen3-8B-GGUF":
+        cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
+        ttl: 120
+  '';
+
  # Set your time zone.
  # time.timeZone = "Europe/Amsterdam";