diff --git a/global/default.nix b/global/default.nix index 7438d0a..38adce1 100755 --- a/global/default.nix +++ b/global/default.nix @@ -235,6 +235,12 @@ nasctui trilium-desktop haruna + (pkgs.runCommand "nvenc-compress" { nativeBuildInputs = [ pkgs.bash ]; } + '' + mkdir -p $out/bin + cp ${./nvenc_compress.sh} $out/bin/nvenc-compress + chmod +x $out/bin/nvenc-compress + '') ]; fonts.packages = with pkgs; [ diff --git a/global/nvenc_compress.sh b/global/nvenc_compress.sh new file mode 100755 index 0000000..7f6c5da --- /dev/null +++ b/global/nvenc_compress.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: nvenc_compress.sh [target_size_mb] + +VIDEO="$1" +TARGET_MB="${2:-9}" +TARGET_BYTES=$((TARGET_MB * 1024 * 1024)) + +if [[ ! -f "$VIDEO" ]]; then + echo "Error: file not found: $VIDEO" + exit 1 +fi + +BASENAME="${VIDEO##*/}" +BASENAME="${BASENAME%.*}" +OUTDIR="$(dirname "$VIDEO")" +OUTFILE="$OUTDIR/${BASENAME}_compressed.mp4" + +# nvenc cq range: 0 (best/largest) to 51 (worst/smallest) +# We binary search this range to hit the target file size +LO=0 +HI=51 +BEST_DIFF=999999999 +BEST_CQ=23 +MAX_ITERATIONS=20 + +echo "Target size: ${TARGET_MB}MB (${TARGET_BYTES} bytes)" +echo "Encoding with nvenc (h264_nvenc)..." +echo "" + +for ((i=0; i/dev/null + + if [[ ! -f "$TMPFILE" ]]; then + echo "Error: ffmpeg failed at cq=$CQ" + rm -f "$TMPFILE" + break + fi + + FILE_SIZE=$(stat -c%s "$TMPFILE" 2>/dev/null || stat -f%z "$TMPFILE" 2>/dev/null) + DIFF=$(( FILE_SIZE - TARGET_BYTES )) + ABS_DIFF=${DIFF#-} + + echo " Size: $(( FILE_SIZE / 1024 / 1024 ))MB (diff: $(( DIFF / 1024 / 1024 ))MB)" + + if (( ABS_DIFF < BEST_DIFF )); then + BEST_DIFF=$ABS_DIFF + BEST_CQ=$CQ + cp "$TMPFILE" "$OUTFILE" + fi + + rm -f "$TMPFILE" + + # If within 1% of target, we're close enough + if (( ABS_DIFF < TARGET_BYTES / 100 )); then + echo "" + echo "Within tolerance! Stopping." + break + fi + + # Binary search direction: + # Higher cq = smaller file, lower cq = larger file + if (( FILE_SIZE > TARGET_BYTES )); then + # File too big, need higher cq (smaller) + LO=$(( CQ + 1 )) + else + # File too small, need lower cq (larger) + HI=$(( CQ - 1 )) + fi + + if (( LO > HI )); then + echo "" + echo "Range exhausted. Stopping." + break + fi +done + +echo "" +echo "Done! Best cq=$BEST_CQ, final size: $(( BEST_DIFF / 1024 / 1024 ))MB from target" +echo "Output: $OUTFILE" diff --git a/machines/homepc/configuration.nix b/machines/homepc/configuration.nix index a542063..0d79fab 100755 --- a/machines/homepc/configuration.nix +++ b/machines/homepc/configuration.nix @@ -49,6 +49,7 @@ pkgs.llama-cpp pkgs.llama-swap # pkgs.agent-browser + pkgs.nvidia-container-toolkit inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.pi inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.rtk @@ -112,20 +113,14 @@ "Qwen3-4B-Claude-Opus-Distill": cmd: llama-server --port ''${PORT} -hf TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill-GGUF:Q4_K_M --ctx-size 32768 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 ttl: 300 - "Qwen3.5-9B-Thinking": - cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 128000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj --no-kv-offload" + "Qwen3.5-9B": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" ttl: 300 "Qwen3.5-9B-Claude-Opus-Distill": cmd: "llama-server --port ''${PORT} -hf Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF:Q4_K_S --ctx-size 32000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 512 --chat-template-kwargs '{\"enable_thinking\": true}' --no-mmproj" ttl: 300 - "Qwen3.5-4B-Thinking": - cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --chat-template-kwargs '{\"enable_thinking\": true}'" - ttl: 300 - "Qwen3.5-9B-Non-Thinking": - cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-9B-GGUF:Q4_K_M --ctx-size 32000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256 --no-mmproj" - ttl: 300 - "Qwen3.5-4B-Non-Thinking": - cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" + "Qwen3.5-4B": + cmd: "llama-server --port ''${PORT} -hf unsloth/Qwen3.5-4B-GGUF:Q4_K_M --ctx-size 64000 --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" ttl: 300 "Gemma4-E4B": cmd: "llama-server --port ''${PORT} -hf Abhiray/gemma-4-E4B-it-heretic-GGUF:Q4_K_M --ctx-size 64000 --temp 0.7 --top-p 0.8 --top-k 20 --min-p 0.00 -fa on --jinja -kvu -np 1 --fit-target 256" diff --git a/modules/virtualization/default.nix b/modules/virtualization/default.nix index f7c826c..7d64261 100755 --- a/modules/virtualization/default.nix +++ b/modules/virtualization/default.nix @@ -14,5 +14,6 @@ virtualisation.podman = { enable = true; dockerCompat = true; + enableNvidia = true; }; }