Compare commits
3 Commits
67c9ce550d
...
ca3c740b66
| Author | SHA1 | Date | |
|---|---|---|---|
| ca3c740b66 | |||
| 4b3a369c6e | |||
| b7e89f4da4 |
185
flake.lock
generated
185
flake.lock
generated
@@ -1,5 +1,85 @@
|
||||
{
|
||||
"nodes": {
|
||||
"blueprint": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"llm-agents",
|
||||
"nixpkgs"
|
||||
],
|
||||
"systems": [
|
||||
"llm-agents",
|
||||
"systems"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1771437256,
|
||||
"narHash": "sha256-bLqwib+rtyBRRVBWhMuBXPCL/OThfokA+j6+uH7jDGU=",
|
||||
"owner": "numtide",
|
||||
"repo": "blueprint",
|
||||
"rev": "06ee7190dc2620ea98af9eb225aa9627b68b0e33",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "blueprint",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"bun2nix": {
|
||||
"inputs": {
|
||||
"flake-parts": [
|
||||
"llm-agents",
|
||||
"flake-parts"
|
||||
],
|
||||
"import-tree": "import-tree",
|
||||
"nixpkgs": [
|
||||
"llm-agents",
|
||||
"nixpkgs"
|
||||
],
|
||||
"systems": [
|
||||
"llm-agents",
|
||||
"systems"
|
||||
],
|
||||
"treefmt-nix": [
|
||||
"llm-agents",
|
||||
"treefmt-nix"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1770895533,
|
||||
"narHash": "sha256-v3QaK9ugy9bN9RXDnjw0i2OifKmz2NnKM82agtqm/UY=",
|
||||
"owner": "nix-community",
|
||||
"repo": "bun2nix",
|
||||
"rev": "c843f477b15f51151f8c6bcc886954699440a6e1",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "bun2nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-parts": {
|
||||
"inputs": {
|
||||
"nixpkgs-lib": [
|
||||
"llm-agents",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1772408722,
|
||||
"narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"home-manager": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
@@ -7,11 +87,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1772060133,
|
||||
"narHash": "sha256-VuyRptb8v1lVGMlLp4/1vRX3Efwec0CN0S6mKmDPzLg=",
|
||||
"lastModified": 1772845525,
|
||||
"narHash": "sha256-Dp5Ir2u4jJDGCgeMRviHvEQDe+U37hMxp6RSNOoMMPc=",
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"rev": "ce9b6e52500a0ea0ec48f0bbf6d7a3e431d9dfa4",
|
||||
"rev": "27b93804fbef1544cb07718d3f0a451f4c4cd6c0",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -61,6 +141,44 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"import-tree": {
|
||||
"locked": {
|
||||
"lastModified": 1763762820,
|
||||
"narHash": "sha256-ZvYKbFib3AEwiNMLsejb/CWs/OL/srFQ8AogkebEPF0=",
|
||||
"owner": "vic",
|
||||
"repo": "import-tree",
|
||||
"rev": "3c23749d8013ec6daa1d7255057590e9ca726646",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "vic",
|
||||
"repo": "import-tree",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"llm-agents": {
|
||||
"inputs": {
|
||||
"blueprint": "blueprint",
|
||||
"bun2nix": "bun2nix",
|
||||
"flake-parts": "flake-parts",
|
||||
"nixpkgs": "nixpkgs_2",
|
||||
"systems": "systems",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1773322058,
|
||||
"narHash": "sha256-xYQ32BrphBupOi+rTm3XF3URFEmH0kHYVRi58fMER0I=",
|
||||
"owner": "Qumulo",
|
||||
"repo": "llm-agents",
|
||||
"rev": "efdf1f01a8474c3c6c2d0f95a66c97b1baec4dc7",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "Qumulo",
|
||||
"repo": "llm-agents",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1768564909,
|
||||
@@ -79,11 +197,27 @@
|
||||
},
|
||||
"nixpkgs_2": {
|
||||
"locked": {
|
||||
"lastModified": 1771848320,
|
||||
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
|
||||
"lastModified": 1773201692,
|
||||
"narHash": "sha256-NXrKzNMniu4Oam2kAFvqJ3GB2kAvlAFIriTAheaY8hw=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "b6067cc0127d4db9c26c79e4de0513e58d0c40c9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixpkgs-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs_3": {
|
||||
"locked": {
|
||||
"lastModified": 1772773019,
|
||||
"narHash": "sha256-E1bxHxNKfDoQUuvriG71+f+s/NT0qWkImXsYZNFFfCs=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "2fc6539b481e1d2569f25f8799236694180c0993",
|
||||
"rev": "aca4d95fce4914b3892661bcb80b8087293536c6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -120,9 +254,46 @@
|
||||
"inputs": {
|
||||
"home-manager": "home-manager",
|
||||
"impermanence": "impermanence",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"llm-agents": "llm-agents",
|
||||
"nixpkgs": "nixpkgs_3",
|
||||
"plasma-manager": "plasma-manager"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"treefmt-nix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"llm-agents",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1773297127,
|
||||
"narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
inputs.home-manager.follows = "home-manager";
|
||||
};
|
||||
llm-agents.url = "github:Qumulo/llm-agents";
|
||||
# nix-flatpak.url = "github:gmodena/nix-flatpak/?ref=v0.6.0";
|
||||
};
|
||||
|
||||
@@ -26,7 +27,6 @@
|
||||
nixpkgs,
|
||||
home-manager,
|
||||
impermanence,
|
||||
plasma-manager,
|
||||
# nix-flatpak,
|
||||
}@inputs:
|
||||
let
|
||||
|
||||
@@ -33,13 +33,13 @@
|
||||
blasSupport = true;
|
||||
}).overrideAttrs
|
||||
(oldAttrs: rec {
|
||||
version = "8184";
|
||||
version = "8209";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "aagit";
|
||||
owner = "ggml-org";
|
||||
repo = "llama.cpp";
|
||||
# tag = "b${version}";
|
||||
rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
|
||||
hash = "sha256-xryajW0Cs1d+WDijspMTW21FDaZP9Grkb+uErMQCQ48=";
|
||||
tag = "b${version}";
|
||||
# rev = "6ebf2e0d00d31acfc1a1fa9662e9a7d38bd07bf7"; # https://github.com/ggml-org/llama.cpp/pull/19970
|
||||
hash = "sha256-7z9mQZ/hgNS+doLCVPtax+FBhr6dEfmR9wZJTwtl/pM=";
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||
@@ -54,7 +54,30 @@
|
||||
# for reproducible builds). We sacrifice portability for faster CPU layers.
|
||||
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
||||
"-DGGML_NATIVE=ON"
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090 - needed since sandbox has no GPU
|
||||
"-DGGML_LTO=ON" # Link Time Optimization for overall binary speed
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=86" # RTX 3090
|
||||
"-DGGML_CUDA=ON"
|
||||
"-DGGML_CUDA_FA=ON" # FlashAttention kernels (accelerated attention)
|
||||
"-DGGML_CUDA_FA_ALL_QUANTS=ON" # Support for all KV cache quant types in FA
|
||||
"-DGGML_CUDA_GRAPHS=ON" # CUDA Graphs for lower overhead inference
|
||||
"-DGGML_CUDA_FORCE_CUBLAS=ON" # cuBLAS optimized prompt processing for Ampere+
|
||||
"-DGGML_CUDA_PEER_MAX_BATCH_SIZE=256" # Increased for multi-GPU efficiency (split mode)
|
||||
"-DGGML_CUDA_COMPRESSION_MODE=speed" # Fast binary loading (CUDA 12.8+)
|
||||
"-DGGML_OPENMP=ON" # Optimal multi-threading on CPU
|
||||
"-DGGML_LLAMAFILE=ON" # Use llamafile sgemm for faster CPU layers
|
||||
"-DGGML_CPU_REPACK=ON" # Optimize Q4_0 quant handling
|
||||
"-DGGML_AVX=ON"
|
||||
"-DGGML_AVX2=ON"
|
||||
"-DGGML_FMA=ON"
|
||||
"-DGGML_F16C=ON"
|
||||
"-DGGML_AVX512=ON" # Intel AVX-512 extensions
|
||||
"-DGGML_AVX512_VNNI=ON" # Vector Neural Network Instructions
|
||||
"-DGGML_AVX512_BF16=ON" # Bfloat16 support
|
||||
"-DGGML_AVX_VNNI=ON" # VNNI for processors without AVX-512
|
||||
"-DGGML_AMX_TILE=ON" # Intel Advanced Matrix Extensions (Sapphire Rapids+)
|
||||
"-DGGML_AMX_INT8=ON"
|
||||
"-DGGML_AMX_BF16=ON"
|
||||
"-DGGML_BLAS=ON" # Uses internal BLAS provided by Nix (blasSupport=true works)
|
||||
];
|
||||
|
||||
# Disable Nix's NIX_ENFORCE_NO_NATIVE which strips -march=native flags
|
||||
@@ -178,7 +201,7 @@
|
||||
qpwgraph
|
||||
libimobiledevice
|
||||
ifuse
|
||||
neofetch
|
||||
fastfetch
|
||||
gimp # Despite the fact it falls under creative an image editor is too important to leave out.
|
||||
zip
|
||||
xz
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
inputs,
|
||||
...
|
||||
}:
|
||||
{
|
||||
@@ -42,11 +43,14 @@
|
||||
# TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
|
||||
# And this config is getting a bit complicated for a single pc config
|
||||
# Should be moved to it's own shit
|
||||
environment.systemPackages = with pkgs; [
|
||||
ollama-cuda
|
||||
opencode
|
||||
llama-cpp
|
||||
llama-swap
|
||||
environment.systemPackages = [
|
||||
pkgs.ollama-cuda
|
||||
pkgs.opencode
|
||||
pkgs.llama-cpp
|
||||
pkgs.llama-swap
|
||||
|
||||
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi
|
||||
inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk
|
||||
];
|
||||
|
||||
services.ollama = {
|
||||
@@ -87,32 +91,35 @@
|
||||
environment.etc."llama-swap/config.yaml".text = ''
|
||||
models:
|
||||
"Qwen3.5-35B-A3B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --cpu-moe --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-IQ4_XS --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --no-kv-offload
|
||||
ttl: 2400
|
||||
"Qwen3-1.7B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3-8B-GGUF":
|
||||
cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3-4B-Claude-Opus-Distill":
|
||||
cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3.5-9B-Thinking":
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
|
||||
ttl: 120
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload"
|
||||
ttl: 300
|
||||
"Qwen3.5-9B-Claude-Opus-Distill":
|
||||
cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
|
||||
ttl: 300
|
||||
"Qwen3.5-4B-Thinking":
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3.5-9B-Non-Thinking":
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3.5-4B-Non-Thinking":
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
|
||||
ttl: 120
|
||||
ttl: 300
|
||||
"Qwen3.5-0.8B-Non-Thinking":
|
||||
cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
|
||||
ttl: 120'';
|
||||
ttl: 300'';
|
||||
|
||||
# Set your time zone.
|
||||
# time.timeZone = "Europe/Amsterdam";
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
remotePlay.openFirewall = true; # Open ports in the firewall for Steam Remote Play
|
||||
dedicatedServer.openFirewall = true; # Open ports in the firewall for Source Dedicated Server
|
||||
localNetworkGameTransfers.openFirewall = true; # Open ports in the firewall for Steam Local Network Game Transfers
|
||||
protontricks.enable = true;
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
|
||||
Reference in New Issue
Block a user