diff --git a/flake.lock b/flake.lock index 4c1a651..4ddf8ba 100755 --- a/flake.lock +++ b/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1770476834, - "narHash": "sha256-cyxgVsNfHnJ4Zn6G1EOzfTXbjTy7Ds9zMOsZaX7VZWs=", + "lastModified": 1772060133, + "narHash": "sha256-VuyRptb8v1lVGMlLp4/1vRX3Efwec0CN0S6mKmDPzLg=", "owner": "nix-community", "repo": "home-manager", - "rev": "6cee0821577643e0b34e2c5d9a90d0b1b5cdca70", + "rev": "ce9b6e52500a0ea0ec48f0bbf6d7a3e431d9dfa4", "type": "github" }, "original": { @@ -79,11 +79,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1770197578, - "narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=", + "lastModified": 1771848320, + "narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=", "owner": "nixos", "repo": "nixpkgs", - "rev": "00c21e4c93d963c50d4c0c89bfa84ed6e0694df2", + "rev": "2fc6539b481e1d2569f25f8799236694180c0993", "type": "github" }, "original": { diff --git a/global/default.nix b/global/default.nix index eecbc60..8198fb2 100755 --- a/global/default.nix +++ b/global/default.nix @@ -20,6 +20,61 @@ ]; nix.package = pkgs.lixPackageSets.stable.lix; + nixpkgs.config.packageOverrides = pkgs: { + # Stolen from https://www.nijho.lt/post/llama-nixos/ + llama-cpp = + (pkgs.llama-cpp.override { + cudaSupport = true; + rocmSupport = false; + metalSupport = false; + # Enable BLAS for optimized CPU layer performance (OpenBLAS) + # This is crucial for models using split-mode or CPU offloading + blasSupport = true; + }).overrideAttrs + (oldAttrs: rec { + version = "8162"; + src = pkgs.fetchFromGitHub { + owner = "ggml-org"; + repo = "llama.cpp"; + tag = "b${version}"; + hash = "sha256-Tn6Trhvmm+n7qyjSOD6WpnZmEHGrkHqZE6I0HQE1TPY="; + leaveDotGit = true; + postFetch = '' + git -C "$out" rev-parse --short HEAD > $out/COMMIT + find "$out" -name .git -print0 | xargs -0 rm -rf + ''; + }; + # Enable native CPU optimizations for massively better CPU performance + # This enables AVX, AVX2, AVX-512, FMA, etc. for your specific CPU + # NOTE: This is intentionally opposite of nixpkgs (which uses -DGGML_NATIVE=off + # for reproducible builds). We sacrifice portability for faster CPU layers. + cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [ + "-DGGML_NATIVE=ON" + "-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090 - needed since sandbox has no GPU + ]; + + # Disable Nix's NIX_ENFORCE_NO_NATIVE which strips -march=native flags + # See: https://github.com/NixOS/nixpkgs/issues/357736 + # See: https://github.com/NixOS/nixpkgs/pull/377484 (intentionally contradicts this) + preConfigure = '' + export NIX_ENFORCE_NO_NATIVE=0 + ${oldAttrs.preConfigure or ""} + ''; + }); + + # llama-swap from GitHub releases + llama-swap = pkgs.runCommand "llama-swap" { } '' + mkdir -p $out/bin + tar -xzf ${ + pkgs.fetchurl { + url = "https://github.com/mostlygeek/llama-swap/releases/download/v190/llama-swap_190_linux_amd64.tar.gz"; + hash = "sha256-WAfmJ4YiVH/UYq++l2Ut6oLqkd270HgG7eV+6FG/0Oc="; + } + } -C $out/bin + chmod +x $out/bin/llama-swap + ''; + }; + # 🇺🇸 i18n.defaultLocale = "en_US.UTF-8"; i18n.extraLocaleSettings = { @@ -57,7 +112,7 @@ jack.enable = true; }; - programs.adb.enable = true; # Bit heavy but you never know when you might need it... + # programs.adb.enable = true; # Bit heavy but you never know when you might need it... users.users.laythe = { isNormalUser = true; @@ -114,7 +169,7 @@ winetricks gparted unrar - electrum + # electrum qpwgraph libimobiledevice ifuse @@ -144,6 +199,7 @@ xmodmap nixfmt units + android-tools ]; services.usbmuxd = { diff --git a/machines/homepc/configuration.nix b/machines/homepc/configuration.nix index fd1be82..d00122c 100755 --- a/machines/homepc/configuration.nix +++ b/machines/homepc/configuration.nix @@ -42,11 +42,42 @@ environment.systemPackages = with pkgs; [ ollama-cuda opencode + llama-cpp + llama-swap ]; services.ollama = { enable = true; package = pkgs.ollama-cuda; + environmentVariables = { + OLLAMA_NUM_PARALLEL = "1"; + OLLAMA_FLASH_ATTENTION = "1"; + OLLAMA_KV_CACHE_TYPE = "q4_0"; + OLLAMA_CONTEXT_LENGTH = "16384"; + }; + }; + + # Configure llama-swap as a systemd service + systemd.services.llama-swap = { + description = "llama-swap - OpenAI compatible proxy with automatic model swapping"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + + serviceConfig = { + Type = "simple"; + User = "laythe"; + Group = "users"; + # Point to your declarative config file + ExecStart = "${pkgs.llama-swap}/bin/llama-swap --config /etc/llama-swap/config.yaml --listen 0.0.0.0:9292 --watch-config"; + Restart = "always"; + RestartSec = 10; + + # Environment for CUDA support + Environment = [ + "PATH=/run/current-system/sw/bin" + "LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/opengl-driver-32/lib" + ]; + }; }; # Set your time zone. diff --git a/modules/fabrication/default.nix b/modules/fabrication/default.nix index 742887f..ba424ff 100755 --- a/modules/fabrication/default.nix +++ b/modules/fabrication/default.nix @@ -1,7 +1,7 @@ -{pkgs, ...}: +{ pkgs, ... }: { environment.systemPackages = with pkgs; [ kicad - cura + # cura ]; -} \ No newline at end of file +} diff --git a/modules/kde/default.nix b/modules/kde/default.nix index 7499abd..1dbf4be 100755 --- a/modules/kde/default.nix +++ b/modules/kde/default.nix @@ -10,6 +10,7 @@ environment.systemPackages = with pkgs; [ firefox # Odd spot, but my choice of browser is determined by environment. kdePackages.kate + kdePackages.filelight # kdePackages.kdeconnect-kde ]; } diff --git a/modules/virtualization/default.nix b/modules/virtualization/default.nix index e58fdc1..7d64261 100755 --- a/modules/virtualization/default.nix +++ b/modules/virtualization/default.nix @@ -1,4 +1,4 @@ -{pkgs, ...}: +{ pkgs, ... }: { environment.systemPackages = with pkgs; [ qemu @@ -6,7 +6,7 @@ ]; programs.virt-manager.enable = true; - users.groups.libvirtd.members = ["laythe"]; + users.groups.libvirtd.members = [ "laythe" ]; virtualisation.libvirtd.enable = true; virtualisation.spiceUSBRedirection.enable = true; virtualisation.libvirtd.qemu.swtpm.enable = true; @@ -14,5 +14,6 @@ virtualisation.podman = { enable = true; dockerCompat = true; + enableNvidia = true; }; -} \ No newline at end of file +}