Compare commits

...

3 Commits

5 changed files with 233 additions and 31 deletions

185
flake.lock generated
View File

@@ -1,5 +1,85 @@
{ {
"nodes": { "nodes": {
"blueprint": {
"inputs": {
"nixpkgs": [
"llm-agents",
"nixpkgs"
],
"systems": [
"llm-agents",
"systems"
]
},
"locked": {
"lastModified": 1771437256,
"narHash": "sha256-bLqwib+rtyBRRVBWhMuBXPCL/OThfokA+j6+uH7jDGU=",
"owner": "numtide",
"repo": "blueprint",
"rev": "06ee7190dc2620ea98af9eb225aa9627b68b0e33",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "blueprint",
"type": "github"
}
},
"bun2nix": {
"inputs": {
"flake-parts": [
"llm-agents",
"flake-parts"
],
"import-tree": "import-tree",
"nixpkgs": [
"llm-agents",
"nixpkgs"
],
"systems": [
"llm-agents",
"systems"
],
"treefmt-nix": [
"llm-agents",
"treefmt-nix"
]
},
"locked": {
"lastModified": 1770895533,
"narHash": "sha256-v3QaK9ugy9bN9RXDnjw0i2OifKmz2NnKM82agtqm/UY=",
"owner": "nix-community",
"repo": "bun2nix",
"rev": "c843f477b15f51151f8c6bcc886954699440a6e1",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "bun2nix",
"type": "github"
}
},
"flake-parts": {
"inputs": {
"nixpkgs-lib": [
"llm-agents",
"nixpkgs"
]
},
"locked": {
"lastModified": 1772408722,
"narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
"type": "github"
},
"original": {
"owner": "hercules-ci",
"repo": "flake-parts",
"type": "github"
}
},
"home-manager": { "home-manager": {
"inputs": { "inputs": {
"nixpkgs": [ "nixpkgs": [
@@ -7,11 +87,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1772060133, "lastModified": 1772845525,
"narHash": "sha256-VuyRptb8v1lVGMlLp4/1vRX3Efwec0CN0S6mKmDPzLg=", "narHash": "sha256-Dp5Ir2u4jJDGCgeMRviHvEQDe+U37hMxp6RSNOoMMPc=",
"owner": "nix-community", "owner": "nix-community",
"repo": "home-manager", "repo": "home-manager",
"rev": "ce9b6e52500a0ea0ec48f0bbf6d7a3e431d9dfa4", "rev": "27b93804fbef1544cb07718d3f0a451f4c4cd6c0",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -61,6 +141,44 @@
"type": "github" "type": "github"
} }
}, },
"import-tree": {
"locked": {
"lastModified": 1763762820,
"narHash": "sha256-ZvYKbFib3AEwiNMLsejb/CWs/OL/srFQ8AogkebEPF0=",
"owner": "vic",
"repo": "import-tree",
"rev": "3c23749d8013ec6daa1d7255057590e9ca726646",
"type": "github"
},
"original": {
"owner": "vic",
"repo": "import-tree",
"type": "github"
}
},
"llm-agents": {
"inputs": {
"blueprint": "blueprint",
"bun2nix": "bun2nix",
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs_2",
"systems": "systems",
"treefmt-nix": "treefmt-nix"
},
"locked": {
"lastModified": 1773322058,
"narHash": "sha256-xYQ32BrphBupOi+rTm3XF3URFEmH0kHYVRi58fMER0I=",
"owner": "Qumulo",
"repo": "llm-agents",
"rev": "efdf1f01a8474c3c6c2d0f95a66c97b1baec4dc7",
"type": "github"
},
"original": {
"owner": "Qumulo",
"repo": "llm-agents",
"type": "github"
}
},
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1768564909, "lastModified": 1768564909,
@@ -79,11 +197,27 @@
}, },
"nixpkgs_2": { "nixpkgs_2": {
"locked": { "locked": {
"lastModified": 1771848320, "lastModified": 1773201692,
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=", "narHash": "sha256-NXrKzNMniu4Oam2kAFvqJ3GB2kAvlAFIriTAheaY8hw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "b6067cc0127d4db9c26c79e4de0513e58d0c40c9",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_3": {
"locked": {
"lastModified": 1772773019,
"narHash": "sha256-E1bxHxNKfDoQUuvriG71+f+s/NT0qWkImXsYZNFFfCs=",
"owner": "nixos", "owner": "nixos",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "2fc6539b481e1d2569f25f8799236694180c0993", "rev": "aca4d95fce4914b3892661bcb80b8087293536c6",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -120,9 +254,46 @@
"inputs": { "inputs": {
"home-manager": "home-manager", "home-manager": "home-manager",
"impermanence": "impermanence", "impermanence": "impermanence",
"nixpkgs": "nixpkgs", "llm-agents": "llm-agents",
"nixpkgs": "nixpkgs_3",
"plasma-manager": "plasma-manager" "plasma-manager": "plasma-manager"
} }
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"treefmt-nix": {
"inputs": {
"nixpkgs": [
"llm-agents",
"nixpkgs"
]
},
"locked": {
"lastModified": 1773297127,
"narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
"owner": "numtide",
"repo": "treefmt-nix",
"rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "treefmt-nix",
"type": "github"
}
} }
}, },
"root": "root", "root": "root",

View File

@@ -17,6 +17,7 @@
inputs.nixpkgs.follows = "nixpkgs"; inputs.nixpkgs.follows = "nixpkgs";
inputs.home-manager.follows = "home-manager"; inputs.home-manager.follows = "home-manager";
}; };
llm-agents.url = "github:Qumulo/llm-agents";
# nix-flatpak.url = "github:gmodena/nix-flatpak/?ref=v0.6.0"; # nix-flatpak.url = "github:gmodena/nix-flatpak/?ref=v0.6.0";
}; };
@@ -26,8 +27,7 @@
nixpkgs, nixpkgs,
home-manager, home-manager,
impermanence, impermanence,
plasma-manager, # nix-flatpak,
# nix-flatpak,
}@inputs: }@inputs:
let let
inherit (self) outputs; inherit (self) outputs;

View File

@@ -33,13 +33,13 @@
blasSupport = true; blasSupport = true;
}).overrideAttrs }).overrideAttrs
(oldAttrs: rec { (oldAttrs: rec {
version = "8184"; version = "8209";
src = pkgs.fetchFromGitHub { src = pkgs.fetchFromGitHub {
owner = "aagit"; owner = "ggml-org";
repo = "llama.cpp"; repo = "llama.cpp";
# tag = "b${version}"; tag = "b${version}";
rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970 # rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
hash = "sha256-xryajW0Cs1d+WDijspMTW21FDaZP9Grkb+uErMQCQ48="; hash = "sha256-7z9mQZ/hgNS+doLCVPtax+FBhr6dEfmR9wZJTwtl/pM=";
leaveDotGit = true; leaveDotGit = true;
postFetch = '' postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT git -C "$out" rev-parse --short HEAD > $out/COMMIT
@@ -54,7 +54,30 @@
# for reproducible builds). We sacrifice portability for faster CPU layers. # for reproducible builds). We sacrifice portability for faster CPU layers.
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [ cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
"-DGGML_NATIVE=ON" "-DGGML_NATIVE=ON"
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090 - needed since sandbox has no GPU "-DGGML_LTO=ON" # Link Time Optimization for overall binary speed
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090
"-DGGML_CUDA=ON"
"-DGGML_CUDA_FA=ON" # FlashAttention kernels (accelerated attention)
"-DGGML_CUDA_FA_ALL_QUANTS=ON" # Support for all KV cache quant types in FA
"-DGGML_CUDA_GRAPHS=ON" # CUDA Graphs for lower overhead inference
"-DGGML_CUDA_FORCE_CUBLAS=ON" # cuBLAS optimized prompt processing for Ampere+
"-DGGML_CUDA_PEER_MAX_BATCH_SIZE=256" # Increased for multi-GPU efficiency (split mode)
"-DGGML_CUDA_COMPRESSION_MODE=speed" # Fast binary loading (CUDA 12.8+)
"-DGGML_OPENMP=ON" # Optimal multi-threading on CPU
"-DGGML_LLAMAFILE=ON" # Use llamafile sgemm for faster CPU layers
"-DGGML_CPU_REPACK=ON" # Optimize Q4_0 quant handling
"-DGGML_AVX=ON"
"-DGGML_AVX2=ON"
"-DGGML_FMA=ON"
"-DGGML_F16C=ON"
"-DGGML_AVX512=ON" # Intel AVX-512 extensions
"-DGGML_AVX512_VNNI=ON" # Vector Neural Network Instructions
"-DGGML_AVX512_BF16=ON" # Bfloat16 support
"-DGGML_AVX_VNNI=ON" # VNNI for processors without AVX-512
"-DGGML_AMX_TILE=ON" # Intel Advanced Matrix Extensions (Sapphire Rapids+)
"-DGGML_AMX_INT8=ON"
"-DGGML_AMX_BF16=ON"
"-DGGML_BLAS=ON" # Uses internal BLAS provided by Nix (blasSupport=true works)
]; ];
# Disable Nix's NIX_ENFORCE_NO_NATIVE which strips -march=native flags # Disable Nix's NIX_ENFORCE_NO_NATIVE which strips -march=native flags
@@ -178,7 +201,7 @@
qpwgraph qpwgraph
libimobiledevice libimobiledevice
ifuse ifuse
neofetch fastfetch
gimp # Despite the fact it falls under creative an image editor is too important to leave out. gimp # Despite the fact it falls under creative an image editor is too important to leave out.
zip zip
xz xz

View File

@@ -6,6 +6,7 @@
config, config,
lib, lib,
pkgs, pkgs,
inputs,
... ...
}: }:
{ {
@@ -42,11 +43,14 @@
# TODO: honestly, while I currently only have one pc that can run local AI, might change in the future. # TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
# And this config is getting a bit complicated for a single pc config # And this config is getting a bit complicated for a single pc config
# Should be moved to it's own shit # Should be moved to it's own shit
environment.systemPackages = with pkgs; [ environment.systemPackages = [
ollama-cuda pkgs.ollama-cuda
opencode pkgs.opencode
llama-cpp pkgs.llama-cpp
llama-swap pkgs.llama-swap
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk
]; ];
services.ollama = { services.ollama = {
@@ -87,32 +91,35 @@
environment.etc."llama-swap/config.yaml".text = '' environment.etc."llama-swap/config.yaml".text = ''
models: models:
"Qwen3.5-35B-A3B-GGUF": "Qwen3.5-35B-A3B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --cpu-moe --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ4_XS --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --no-kv-offload
ttl: 2400 ttl: 2400
"Qwen3-1.7B-GGUF": "Qwen3-1.7B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
ttl: 120 ttl: 300
"Qwen3-8B-GGUF": "Qwen3-8B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
ttl: 120 ttl: 300
"Qwen3-4B-Claude-Opus-Distill": "Qwen3-4B-Claude-Opus-Distill":
cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
ttl: 120 ttl: 300
"Qwen3.5-9B-Thinking": "Qwen3.5-9B-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj" cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload"
ttl: 120 ttl: 300
"Qwen3.5-9B-Claude-Opus-Distill":
cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
ttl: 300
"Qwen3.5-4B-Thinking": "Qwen3.5-4B-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'" cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
ttl: 120 ttl: 300
"Qwen3.5-9B-Non-Thinking": "Qwen3.5-9B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj" cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
ttl: 120 ttl: 300
"Qwen3.5-4B-Non-Thinking": "Qwen3.5-4B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
ttl: 120 ttl: 300
"Qwen3.5-0.8B-Non-Thinking": "Qwen3.5-0.8B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
ttl: 120''; ttl: 300'';
# Set your time zone. # Set your time zone.
# time.timeZone = "Europe/Amsterdam"; # time.timeZone = "Europe/Amsterdam";

View File

@@ -5,6 +5,7 @@
remotePlay.openFirewall = true; # Open ports in the firewall for Steam Remote Play remotePlay.openFirewall = true; # Open ports in the firewall for Steam Remote Play
dedicatedServer.openFirewall = true; # Open ports in the firewall for Source Dedicated Server dedicatedServer.openFirewall = true; # Open ports in the firewall for Source Dedicated Server
localNetworkGameTransfers.openFirewall = true; # Open ports in the firewall for Steam Local Network Game Transfers localNetworkGameTransfers.openFirewall = true; # Open ports in the firewall for Steam Local Network Game Transfers
protontricks.enable = true;
}; };
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [