Compare commits

..

3 Commits

5 changed files with 233 additions and 31 deletions

185
flake.lock generated
View File

@@ -1,5 +1,85 @@
{
"nodes": {
"blueprint": {
"inputs": {
"nixpkgs": [
"llm-agents",
"nixpkgs"
],
"systems": [
"llm-agents",
"systems"
]
},
"locked": {
"lastModified": 1771437256,
"narHash": "sha256-bLqwib+rtyBRRVBWhMuBXPCL/OThfokA+j6+uH7jDGU=",
"owner": "numtide",
"repo": "blueprint",
"rev": "06ee7190dc2620ea98af9eb225aa9627b68b0e33",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "blueprint",
"type": "github"
}
},
"bun2nix": {
"inputs": {
"flake-parts": [
"llm-agents",
"flake-parts"
],
"import-tree": "import-tree",
"nixpkgs": [
"llm-agents",
"nixpkgs"
],
"systems": [
"llm-agents",
"systems"
],
"treefmt-nix": [
"llm-agents",
"treefmt-nix"
]
},
"locked": {
"lastModified": 1770895533,
"narHash": "sha256-v3QaK9ugy9bN9RXDnjw0i2OifKmz2NnKM82agtqm/UY=",
"owner": "nix-community",
"repo": "bun2nix",
"rev": "c843f477b15f51151f8c6bcc886954699440a6e1",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "bun2nix",
"type": "github"
}
},
"flake-parts": {
"inputs": {
"nixpkgs-lib": [
"llm-agents",
"nixpkgs"
]
},
"locked": {
"lastModified": 1772408722,
"narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
"type": "github"
},
"original": {
"owner": "hercules-ci",
"repo": "flake-parts",
"type": "github"
}
},
"home-manager": {
"inputs": {
"nixpkgs": [
@@ -7,11 +87,11 @@
]
},
"locked": {
"lastModified": 1772060133,
"narHash": "sha256-VuyRptb8v1lVGMlLp4/1vRX3Efwec0CN0S6mKmDPzLg=",
"lastModified": 1772845525,
"narHash": "sha256-Dp5Ir2u4jJDGCgeMRviHvEQDe+U37hMxp6RSNOoMMPc=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "ce9b6e52500a0ea0ec48f0bbf6d7a3e431d9dfa4",
"rev": "27b93804fbef1544cb07718d3f0a451f4c4cd6c0",
"type": "github"
},
"original": {
@@ -61,6 +141,44 @@
"type": "github"
}
},
"import-tree": {
"locked": {
"lastModified": 1763762820,
"narHash": "sha256-ZvYKbFib3AEwiNMLsejb/CWs/OL/srFQ8AogkebEPF0=",
"owner": "vic",
"repo": "import-tree",
"rev": "3c23749d8013ec6daa1d7255057590e9ca726646",
"type": "github"
},
"original": {
"owner": "vic",
"repo": "import-tree",
"type": "github"
}
},
"llm-agents": {
"inputs": {
"blueprint": "blueprint",
"bun2nix": "bun2nix",
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs_2",
"systems": "systems",
"treefmt-nix": "treefmt-nix"
},
"locked": {
"lastModified": 1773322058,
"narHash": "sha256-xYQ32BrphBupOi+rTm3XF3URFEmH0kHYVRi58fMER0I=",
"owner": "Qumulo",
"repo": "llm-agents",
"rev": "efdf1f01a8474c3c6c2d0f95a66c97b1baec4dc7",
"type": "github"
},
"original": {
"owner": "Qumulo",
"repo": "llm-agents",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1768564909,
@@ -79,11 +197,27 @@
},
"nixpkgs_2": {
"locked": {
"lastModified": 1771848320,
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
"lastModified": 1773201692,
"narHash": "sha256-NXrKzNMniu4Oam2kAFvqJ3GB2kAvlAFIriTAheaY8hw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "b6067cc0127d4db9c26c79e4de0513e58d0c40c9",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_3": {
"locked": {
"lastModified": 1772773019,
"narHash": "sha256-E1bxHxNKfDoQUuvriG71+f+s/NT0qWkImXsYZNFFfCs=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "2fc6539b481e1d2569f25f8799236694180c0993",
"rev": "aca4d95fce4914b3892661bcb80b8087293536c6",
"type": "github"
},
"original": {
@@ -120,9 +254,46 @@
"inputs": {
"home-manager": "home-manager",
"impermanence": "impermanence",
"nixpkgs": "nixpkgs",
"llm-agents": "llm-agents",
"nixpkgs": "nixpkgs_3",
"plasma-manager": "plasma-manager"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"treefmt-nix": {
"inputs": {
"nixpkgs": [
"llm-agents",
"nixpkgs"
]
},
"locked": {
"lastModified": 1773297127,
"narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
"owner": "numtide",
"repo": "treefmt-nix",
"rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "treefmt-nix",
"type": "github"
}
}
},
"root": "root",

View File

@@ -17,6 +17,7 @@
inputs.nixpkgs.follows = "nixpkgs";
inputs.home-manager.follows = "home-manager";
};
llm-agents.url = "github:Qumulo/llm-agents";
# nix-flatpak.url = "github:gmodena/nix-flatpak/?ref=v0.6.0";
};
@@ -26,8 +27,7 @@
nixpkgs,
home-manager,
impermanence,
plasma-manager,
# nix-flatpak,
# nix-flatpak,
}@inputs:
let
inherit (self) outputs;

View File

@@ -33,13 +33,13 @@
blasSupport = true;
}).overrideAttrs
(oldAttrs: rec {
version = "8184";
version = "8209";
src = pkgs.fetchFromGitHub {
owner = "aagit";
owner = "ggml-org";
repo = "llama.cpp";
# tag = "b${version}";
rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
hash = "sha256-xryajW0Cs1d+WDijspMTW21FDaZP9Grkb+uErMQCQ48=";
tag = "b${version}";
# rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
hash = "sha256-7z9mQZ/hgNS+doLCVPtax+FBhr6dEfmR9wZJTwtl/pM=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
@@ -54,7 +54,30 @@
# for reproducible builds). We sacrifice portability for faster CPU layers.
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
"-DGGML_NATIVE=ON"
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090 - needed since sandbox has no GPU
"-DGGML_LTO=ON" # Link Time Optimization for overall binary speed
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090
"-DGGML_CUDA=ON"
"-DGGML_CUDA_FA=ON" # FlashAttention kernels (accelerated attention)
"-DGGML_CUDA_FA_ALL_QUANTS=ON" # Support for all KV cache quant types in FA
"-DGGML_CUDA_GRAPHS=ON" # CUDA Graphs for lower overhead inference
"-DGGML_CUDA_FORCE_CUBLAS=ON" # cuBLAS optimized prompt processing for Ampere+
"-DGGML_CUDA_PEER_MAX_BATCH_SIZE=256" # Increased for multi-GPU efficiency (split mode)
"-DGGML_CUDA_COMPRESSION_MODE=speed" # Fast binary loading (CUDA 12.8+)
"-DGGML_OPENMP=ON" # Optimal multi-threading on CPU
"-DGGML_LLAMAFILE=ON" # Use llamafile sgemm for faster CPU layers
"-DGGML_CPU_REPACK=ON" # Optimize Q4_0 quant handling
"-DGGML_AVX=ON"
"-DGGML_AVX2=ON"
"-DGGML_FMA=ON"
"-DGGML_F16C=ON"
"-DGGML_AVX512=ON" # Intel AVX-512 extensions
"-DGGML_AVX512_VNNI=ON" # Vector Neural Network Instructions
"-DGGML_AVX512_BF16=ON" # Bfloat16 support
"-DGGML_AVX_VNNI=ON" # VNNI for processors without AVX-512
"-DGGML_AMX_TILE=ON" # Intel Advanced Matrix Extensions (Sapphire Rapids+)
"-DGGML_AMX_INT8=ON"
"-DGGML_AMX_BF16=ON"
"-DGGML_BLAS=ON" # Uses internal BLAS provided by Nix (blasSupport=true works)
];
# Disable Nix's NIX_ENFORCE_NO_NATIVE which strips -march=native flags
@@ -178,7 +201,7 @@
qpwgraph
libimobiledevice
ifuse
neofetch
fastfetch
gimp # Despite the fact it falls under creative an image editor is too important to leave out.
zip
xz

View File

@@ -6,6 +6,7 @@
config,
lib,
pkgs,
inputs,
...
}:
{
@@ -42,11 +43,14 @@
# TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
# And this config is getting a bit complicated for a single pc config
# Should be moved to it's own shit
environment.systemPackages = with pkgs; [
ollama-cuda
opencode
llama-cpp
llama-swap
environment.systemPackages = [
pkgs.ollama-cuda
pkgs.opencode
pkgs.llama-cpp
pkgs.llama-swap
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk
];
services.ollama = {
@@ -87,32 +91,35 @@
environment.etc."llama-swap/config.yaml".text = ''
models:
"Qwen3.5-35B-A3B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --cpu-moe --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ4_XS --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --no-kv-offload
ttl: 2400
"Qwen3-1.7B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
ttl: 120
ttl: 300
"Qwen3-8B-GGUF":
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
ttl: 120
ttl: 300
"Qwen3-4B-Claude-Opus-Distill":
cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
ttl: 120
ttl: 300
"Qwen3.5-9B-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
ttl: 120
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload"
ttl: 300
"Qwen3.5-9B-Claude-Opus-Distill":
cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
ttl: 300
"Qwen3.5-4B-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
ttl: 120
ttl: 300
"Qwen3.5-9B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
ttl: 120
ttl: 300
"Qwen3.5-4B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
ttl: 120
ttl: 300
"Qwen3.5-0.8B-Non-Thinking":
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
ttl: 120'';
ttl: 300'';
# Set your time zone.
# time.timeZone = "Europe/Amsterdam";

View File

@@ -5,6 +5,7 @@
remotePlay.openFirewall = true; # Open ports in the firewall for Steam Remote Play
dedicatedServer.openFirewall = true; # Open ports in the firewall for Source Dedicated Server
localNetworkGameTransfers.openFirewall = true; # Open ports in the firewall for Steam Local Network Game Transfers
protontricks.enable = true;
};
environment.systemPackages = with pkgs; [