diff --git a/global/default.nix b/global/default.nix
index 7438d0a..38adce1 100755
--- a/global/default.nix
+++ b/global/default.nix
@@ -235,6 +235,12 @@
     nasctui
     trilium-desktop
     haruna
+    (pkgs.runCommand "nvenc-compress" { nativeBuildInputs = [ pkgs.bash ]; }
+      ''
+        mkdir -p $out/bin
+        cp ${./nvenc_compress.sh} $out/bin/nvenc-compress
+        chmod +x $out/bin/nvenc-compress
+      '')
   ];
 
   fonts.packages = with pkgs; [
diff --git a/global/nvenc_compress.sh b/global/nvenc_compress.sh
new file mode 100755
index 0000000..7f6c5da
--- /dev/null
+++ b/global/nvenc_compress.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Usage: nvenc_compress.sh <video_path> [target_size_mb]
+
+VIDEO="$1"
+TARGET_MB="${2:-9}"
+TARGET_BYTES=$((TARGET_MB * 1024 * 1024))
+
+if [[ ! -f "$VIDEO" ]]; then
+  echo "Error: file not found: $VIDEO"
+  exit 1
+fi
+
+BASENAME="${VIDEO##*/}"
+BASENAME="${BASENAME%.*}"
+OUTDIR="$(dirname "$VIDEO")"
+OUTFILE="$OUTDIR/${BASENAME}_compressed.mp4"
+
+# nvenc cq range: 0 (best/largest) to 51 (worst/smallest)
+# We binary search this range to hit the target file size
+LO=0
+HI=51
+BEST_DIFF=999999999
+BEST_CQ=23
+MAX_ITERATIONS=20
+
+echo "Target size: ${TARGET_MB}MB (${TARGET_BYTES} bytes)"
+echo "Encoding with nvenc (h264_nvenc)..."
+echo ""
+
+for ((i=0; i<MAX_ITERATIONS; i++)); do
+  CQ=$(( (LO + HI) / 2 ))
+  TMPFILE="${OUTFILE}_iter_${i}_cq${CQ}.mp4"
+
+  echo "--- Iteration $((i+1))/$MAX_ITERATIONS ---"
+  echo "Trying cq=$CQ (lo=$LO, hi=$HI)"
+
+  ffmpeg -y -i "$VIDEO" -c:v h264_nvenc -cq "$CQ" -c:a copy -preset p7 -f mp4 "$TMPFILE" 2>/dev/null
+
+  if [[ ! -f "$TMPFILE" ]]; then
+    echo "Error: ffmpeg failed at cq=$CQ"
+    rm -f "$TMPFILE"
+    break
+  fi
+
+  FILE_SIZE=$(stat -c%s "$TMPFILE" 2>/dev/null || stat -f%z "$TMPFILE" 2>/dev/null)
+  DIFF=$(( FILE_SIZE - TARGET_BYTES ))
+  ABS_DIFF=${DIFF#-}
+
+  echo "  Size: $(( FILE_SIZE / 1024 / 1024 ))MB (diff: $(( DIFF / 1024 / 1024 ))MB)"
+
+  if (( ABS_DIFF < BEST_DIFF )); then
+    BEST_DIFF=$ABS_DIFF
+    BEST_CQ=$CQ
+    cp "$TMPFILE" "$OUTFILE"
+  fi
+
+  rm -f "$TMPFILE"
+
+  # If within 1% of target, we're close enough
+  if (( ABS_DIFF < TARGET_BYTES / 100 )); then
+    echo ""
+    echo "Within tolerance! Stopping."
+    break
+  fi
+
+  # Binary search direction:
+  # Higher cq = smaller file, lower cq = larger file
+  if (( FILE_SIZE > TARGET_BYTES )); then
+    # File too big, need higher cq (smaller)
+    LO=$(( CQ + 1 ))
+  else
+    # File too small, need lower cq (larger)
+    HI=$(( CQ - 1 ))
+  fi
+
+  if (( LO > HI )); then
+    echo ""
+    echo "Range exhausted. Stopping."
+    break
+  fi
+done
+
+echo ""
+echo "Done! Best cq=$BEST_CQ, final size: $(( BEST_DIFF / 1024 / 1024 ))MB from target"
+echo "Output: $OUTFILE"
diff --git a/machines/homepc/configuration.nix b/machines/homepc/configuration.nix
index a542063..0d79fab 100755
--- a/machines/homepc/configuration.nix
+++ b/machines/homepc/configuration.nix
@@ -49,6 +49,7 @@
     pkgs.llama-cpp
     pkgs.llama-swap
     # pkgs.agent-browser
+    pkgs.nvidia-container-toolkit
 
     inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi
     inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk
@@ -112,20 +113,14 @@
       "Qwen3-4B-Claude-Opus-Distill":
         cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256
         ttl: 300
-      "Qwen3.5-9B-Thinking":
-        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload"
+      "Qwen3.5-9B":
+        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
         ttl: 300
       "Qwen3.5-9B-Claude-Opus-Distill":
         cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj"
         ttl: 300
-      "Qwen3.5-4B-Thinking":
-        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'"
-        ttl: 300
-      "Qwen3.5-9B-Non-Thinking":
-        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj"
-        ttl: 300
-      "Qwen3.5-4B-Non-Thinking":
-        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
+      "Qwen3.5-4B":
+        cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
         ttl: 300
       "Gemma4-E4B":
         cmd: "llama-server --port ''${PORT} -hf Abhiray/gemma-4-E4B-it-heretic-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256"
diff --git a/modules/virtualization/default.nix b/modules/virtualization/default.nix
index f7c826c..7d64261 100755
--- a/modules/virtualization/default.nix
+++ b/modules/virtualization/default.nix
@@ -14,5 +14,6 @@
   virtualisation.podman = {
     enable = true;
     dockerCompat = true;
+    enableNvidia = true;
   };
 }