im gonna die
This commit is contained in:
@@ -42,11 +42,42 @@
|
||||
environment.systemPackages = with pkgs; [
|
||||
ollama-cuda
|
||||
opencode
|
||||
llama-cpp
|
||||
llama-swap
|
||||
];
|
||||
|
||||
services.ollama = {
|
||||
enable = true;
|
||||
package = pkgs.ollama-cuda;
|
||||
environmentVariables = {
|
||||
OLLAMA_NUM_PARALLEL = "1";
|
||||
OLLAMA_FLASH_ATTENTION = "1";
|
||||
OLLAMA_KV_CACHE_TYPE = "q4_0";
|
||||
OLLAMA_CONTEXT_LENGTH = "16384";
|
||||
};
|
||||
};
|
||||
|
||||
# Configure llama-swap as a systemd service
|
||||
systemd.services.llama-swap = {
|
||||
description = "llama-swap - OpenAI compatible proxy with automatic model swapping";
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
User = "laythe";
|
||||
Group = "users";
|
||||
# Point to your declarative config file
|
||||
ExecStart = "${pkgs.llama-swap}/bin/llama-swap --config /etc/llama-swap/config.yaml --listen 0.0.0.0:9292 --watch-config";
|
||||
Restart = "always";
|
||||
RestartSec = 10;
|
||||
|
||||
# Environment for CUDA support
|
||||
Environment = [
|
||||
"PATH=/run/current-system/sw/bin"
|
||||
"LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/opengl-driver-32/lib"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
# Set your time zone.
|
||||
|
||||
Reference in New Issue
Block a user