im gonna die

This commit is contained in:
2026-02-26 21:01:53 -05:00
parent a2fd8acc07
commit a92363a211
6 changed files with 103 additions and 14 deletions

View File

@@ -42,11 +42,42 @@
environment.systemPackages = with pkgs; [
ollama-cuda
opencode
llama-cpp
llama-swap
];
services.ollama = {
enable = true;
package = pkgs.ollama-cuda;
environmentVariables = {
OLLAMA_NUM_PARALLEL = "1";
OLLAMA_FLASH_ATTENTION = "1";
OLLAMA_KV_CACHE_TYPE = "q4_0";
OLLAMA_CONTEXT_LENGTH = "16384";
};
};
# Configure llama-swap as a systemd service
systemd.services.llama-swap = {
description = "llama-swap - OpenAI compatible proxy with automatic model swapping";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
User = "laythe";
Group = "users";
# Point to your declarative config file
ExecStart = "${pkgs.llama-swap}/bin/llama-swap --config /etc/llama-swap/config.yaml --listen 0.0.0.0:9292 --watch-config";
Restart = "always";
RestartSec = 10;
# Environment for CUDA support
Environment = [
"PATH=/run/current-system/sw/bin"
"LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/opengl-driver-32/lib"
];
};
};
# Set your time zone.