Files
codex_truenas_helper/llamacpp_set_command.ps1
Rushabh Gosar 5d1a0ee72b Initial commit
2026-01-07 16:54:39 -08:00

118 lines
3.5 KiB
PowerShell

param(
[Parameter(Mandatory = $true)][string]$ModelPath,
[Parameter(Mandatory = $true)][int]$CtxSize,
[int]$BatchSize = 1024,
[int]$UBatchSize = 256,
[string]$TensorSplit = "0.5,0.5",
[string]$Devices = "0,1",
[int]$GpuLayers = 999,
[string]$CacheTypeK = "q4_0",
[string]$CacheTypeV = "q4_0",
[string]$GrammarFile = "",
[string]$JsonSchema = "",
[string]$BaseUrl = "http://192.168.1.2:8071",
[int]$TimeoutSec = 600,
[string]$SshExe = "$env:SystemRoot\\System32\\OpenSSH\\ssh.exe",
[string]$SshUser = "rushabh",
[string]$SshHost = "192.168.1.2",
[int]$SshPort = 55555
)
$ErrorActionPreference = "Stop"
$ProgressPreference = "SilentlyContinue"
$commandArgs = @(
"--model", $ModelPath,
"--ctx-size", $CtxSize.ToString(),
"--n-gpu-layers", $GpuLayers.ToString(),
"--split-mode", "layer",
"--tensor-split", $TensorSplit,
"--batch-size", $BatchSize.ToString(),
"--ubatch-size", $UBatchSize.ToString(),
"--cache-type-k", $CacheTypeK,
"--cache-type-v", $CacheTypeV,
"--flash-attn", "on"
)
if (-not [string]::IsNullOrWhiteSpace($Devices)) {
$commandArgs = @("--device", $Devices) + $commandArgs
}
if (-not [string]::IsNullOrWhiteSpace($GrammarFile)) {
$commandArgs += @("--grammar-file", $GrammarFile)
}
if (-not [string]::IsNullOrWhiteSpace($JsonSchema)) {
$commandArgs += @("--json-schema", $JsonSchema)
}
$argJson = $commandArgs | ConvertTo-Json -Compress
$py = @"
import json
path = r"/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
new_cmd = json.loads(r'''$argJson''')
lines = open(path, "r", encoding="utf-8").read().splitlines()
out = []
in_cmd = False
def yaml_quote(value):
text = str(value)
return "'" + text.replace("'", "''") + "'"
for line in lines:
if line.startswith('"command":'):
out.append('"command":')
for arg in new_cmd:
out.append(f"- {yaml_quote(arg)}")
in_cmd = True
continue
if in_cmd:
if line.startswith('"') and not line.startswith('"command":'):
in_cmd = False
out.append(line)
else:
continue
else:
out.append(line)
if in_cmd:
pass
open(path, "w", encoding="utf-8").write("\n".join(out) + "\n")
"@
$py | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -"
$pyCompose = @"
import json, yaml, subprocess
compose_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/templates/rendered/docker-compose.yaml"
user_config_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
with open(compose_path, "r", encoding="utf-8") as f:
compose = json.load(f)
with open(user_config_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
command = config.get("command")
if not command:
raise SystemExit("command list missing from user_config")
svc = compose["services"]["llamacpp"]
svc["command"] = command
with open(compose_path, "w", encoding="utf-8") as f:
json.dump(compose, f)
payload = {"custom_compose_config": compose}
subprocess.run(["midclt", "call", "app.update", "llamacpp", json.dumps(payload)], check=True)
"@
$pyCompose | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" | Out-Null
$start = Get-Date
while ((Get-Date) - $start -lt [TimeSpan]::FromSeconds($TimeoutSec)) {
try {
$resp = Invoke-RestMethod -Uri "$BaseUrl/health" -TimeoutSec 10
if ($resp.status -eq "ok") {
Write-Host "llamacpp healthy at $BaseUrl"
exit 0
}
} catch {
Start-Sleep -Seconds 5
}
}
throw "Timed out waiting for llama.cpp server at $BaseUrl"