nixos-configv3/machines/homepc/configuration.nix

# Edit this configuration file to define what should be installed on
# your system. Help is available in the configuration.nix(5) man page, on
# https://search.nixos.org/options and in the NixOS manual (`nixos-help`).

{
  config,
  lib,
  pkgs,
  inputs,
  ...
}:
{
  imports = [
    ./hardware-configuration.nix
    ../../global/default.nix
    ../../modules/communication/default.nix
    ../../modules/creative/default.nix
    ../../modules/development/default.nix
    ../../modules/fabrication/default.nix
    ../../modules/gaming/default.nix
    ../../modules/kde/default.nix
    ../../modules/virtualization/default.nix
    ../../modules/vr/default.nix
  ];

  # Use the systemd-boot EFI boot loader.
  boot.loader.systemd-boot.enable = true;
  boot.loader.efi.canTouchEfiVariables = true;

  networking.hostName = "homepc"; # Define your hostname.

  # Configure network connections interactively with nmcli or nmtui.
  networking.networkmanager.enable = true;

  # Only computer I own that can actually run ollama, and I don't want to make an ai folder.
  # services.ollama = {
  #   enable = true;
  #   acceleration = "cuda";
  #   host = "[::]";
  # };

  # This is here because I don't have another computer that could run local AI, and regardless the packages would be different on every one.
  # TODO: honestly, while I currently only have one pc that can run local AI, might change in the future.
  # And this config is getting a bit complicated for a single pc config
  # Should be moved to it's own shit
  environment.systemPackages = [
    pkgs.ollama-cuda
    pkgs.opencode
    pkgs.llama-cpp
    pkgs.llama-swap
    # pkgs.agent-browser

    inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi
    inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk
  ];

  services.ollama = {
    enable = true;
    package = pkgs.ollama-cuda;
    environmentVariables = {
      OLLAMA_NUM_PARALLEL = "1";
      OLLAMA_FLASH_ATTENTION = "1";
      OLLAMA_KV_CACHE_TYPE = "q4_0";
      OLLAMA_CONTEXT_LENGTH = "16384";
    };
  };

  # Configure llama-swap as a systemd service
  systemd.services.llama-swap = {
    description = "llama-swap - OpenAI compatible proxy with automatic model swapping";
    after = [ "network.target" ];
    wantedBy = [ "multi-user.target" ];

    serviceConfig = {
      Type = "simple";
      User = "laythe";
      Group = "users";
      # Point to your declarative config file
      ExecStart = "${pkgs.llama-swap}/bin/llama-swap --config /etc/llama-swap/config.yaml --listen 0.0.0.0:9292 --watch-config";
      Restart = "always";
      RestartSec = 10;

      # Environment for CUDA support
      Environment = [
        "PATH=/run/current-system/sw/bin"
        "LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/opengl-driver-32/lib"
      ];
    };
  };

  # As long as this is here the models are declarative. llama-server will grab them if not downloaded already.
  environment.etc."llama-swap/config.yaml".text = ''
    models:
      "Qwen3.6-35B-A3B-GGUF":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q4_K_S --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on  --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --chat-template-kwargs '{\"preserve_thinking\": true}'"
        ttl: 2400
      "Qwen3.6-35B-A3B-FLASH":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q3_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on  --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --chat-template-kwargs '{\"preserve_thinking\": true}'"
        ttl: 2400
      "Qwen3.5-35B-A3B-GGUF":
        cmd: llama-server --port ''${PORT} -hf mudler/Qwen3.5-35B-A3B-APEX-GGUF:Mini --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on  --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --no-kv-offload
        ttl: 2400
      "Gemma4-26B-A4B":
        cmd: llama-server --port ''${PORT} -hf mudler/gemma-4-26B-A4B-it-heretic-APEX-GGUF:Mini --ctx-size 128000 --temp 1.0 --top-p 0.95 --top-k 64 --min-p 0.00 -np 1 --fit-target 256 --ubatch-size 1024 -fa on  --slots --slot-save-path /home/laythe/llamapcache --jinja -kvu --no-mmproj --swa-checkpoints 32 --no-kv-offload
        ttl: 2400
      "Qwen3-1.7B-GGUF":
        cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu
        ttl: 300
      "Qwen3-8B-GGUF":
        cmd: llama-server --port ''${PORT} -hf unsloth/Qwen3-8B-GGUF:Q4_K_S --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1
        ttl: 300
      "Qwen3-4B-Claude-Opus-Distill":
        cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
        ttl: 300
      "Qwen3.5-9B-Thinking":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload"
        ttl: 300
      "Qwen3.5-9B-Claude-Opus-Distill":
        cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
        ttl: 300
      "Qwen3.5-4B-Thinking":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
        ttl: 300
      "Qwen3.5-9B-Non-Thinking":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
        ttl: 300
      "Qwen3.5-4B-Non-Thinking":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
        ttl: 300
      "Gemma4-E4B":
        cmd: "llama-server --port ''${PORT} -hf Abhiray/gemma-4-E4B-it-heretic-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
        ttl: 300
      "Qwen3.5-0.8B-Non-Thinking":
        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-0.8B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
        ttl: 300'';

  # Set your time zone.
  # time.timeZone = "Europe/Amsterdam";

  # Configure network proxy if necessary
  # networking.proxy.default = "http://user:password@proxy:port/";
  # networking.proxy.noProxy = "127.0.0.1,localhost,internal.domain";

  # Select internationalisation properties.
  # i18n.defaultLocale = "en_US.UTF-8";
  # console = {
  #   font = "Lat2-Terminus16";
  #   keyMap = "us";
  #   useXkbConfig = true; # use xkb.options in tty.
  # };

  services.xserver.enable = true; # On anything else modern this should be false.

  # Configure keymap in X11
  # services.xserver.xkb.layout = "us";
  # services.xserver.xkb.options = "eurosign:e,caps:escape";

  # Enable CUPS to print documents.
  # services.printing.enable = true;

  # Enable sound.
  # services.pulseaudio.enable = true;
  # OR
  # services.pipewire = {
  #   enable = true;
  #   pulse.enable = true;
  # };

  # Enable touchpad support (enabled default in most desktopManager).
  # services.libinput.enable = true;

  # Define a user account. Don't forget to set a password with ‘passwd’.
  # users.users.alice = {
  #   isNormalUser = true;
  #   extraGroups = [ "wheel" ]; # Enable ‘sudo’ for the user.
  #   packages = with pkgs; [
  #     tree
  #   ];
  # };

  # programs.firefox.enable = true;

  # List packages installed in system profile.
  # You can use https://search.nixos.org/ to find more packages (and options).
  # environment.systemPackages = with pkgs; [
  #   vim # Do not forget to add an editor to edit configuration.nix! The Nano editor is also installed by default.
  #   wget
  # ];

  # Some programs need SUID wrappers, can be configured further or are
  # started in user sessions.
  # programs.mtr.enable = true;
  # programs.gnupg.agent = {
  #   enable = true;
  #   enableSSHSupport = true;
  # };

  # List services that you want to enable:

  # Enable the OpenSSH daemon.
  # services.openssh.enable = true;

  # Open ports in the firewall.
  # networking.firewall.allowedTCPPorts = [ ... ];
  # networking.firewall.allowedUDPPorts = [ ... ];
  # Or disable the firewall altogether.
  networking.firewall.enable = false;

  # Copy the NixOS configuration file and link it from the resulting system
  # (/run/current-system/configuration.nix). This is useful in case you
  # accidentally delete configuration.nix.
  # system.copySystemConfiguration = true;

  # This option defines the first version of NixOS you have installed on this particular machine,
  # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
  #
  # Most users should NEVER change this value after the initial install, for any reason,
  # even if you've upgraded your system to a new NixOS release.
  #
  # This value does NOT affect the Nixpkgs version your packages and OS are pulled from,
  # so changing it will NOT upgrade your system - see https://nixos.org/manual/nixos/stable/#sec-upgrading for how
  # to actually do that.
  #
  # This value being lower than the current NixOS release does NOT mean your system is
  # out of date, out of support, or vulnerable.
  #
  # Do NOT change this value unless you have manually inspected all the changes it would make to your configuration,
  # and migrated your data accordingly.
  #
  # For more information, see `man configuration.nix` or https://nixos.org/manual/nixos/stable/options#opt-system.stateVersion .
  system.stateVersion = "24.11"; # Did you read the comment?

}