codex_truenas_helper/llamacpp_set_command.ps1

param(
  [Parameter(Mandatory = $true)][string]$ModelPath,
  [Parameter(Mandatory = $true)][int]$CtxSize,
  [int]$BatchSize = 1024,
  [int]$UBatchSize = 256,
  [string]$TensorSplit = "0.5,0.5",
  [string]$Devices = "0,1",
  [int]$GpuLayers = 999,
  [string]$CacheTypeK = "q4_0",
  [string]$CacheTypeV = "q4_0",
  [string]$GrammarFile = "",
  [string]$JsonSchema = "",
  [string]$BaseUrl = "http://192.168.1.2:8071",
  [int]$TimeoutSec = 600,
  [string]$SshExe = "$env:SystemRoot\\System32\\OpenSSH\\ssh.exe",
  [string]$SshUser = "rushabh",
  [string]$SshHost = "192.168.1.2",
  [int]$SshPort = 55555
)

$ErrorActionPreference = "Stop"
$ProgressPreference = "SilentlyContinue"

$commandArgs = @(
  "--model", $ModelPath,
  "--ctx-size", $CtxSize.ToString(),
  "--n-gpu-layers", $GpuLayers.ToString(),
  "--split-mode", "layer",
  "--tensor-split", $TensorSplit,
  "--batch-size", $BatchSize.ToString(),
  "--ubatch-size", $UBatchSize.ToString(),
  "--cache-type-k", $CacheTypeK,
  "--cache-type-v", $CacheTypeV,
  "--flash-attn", "on"
)

if (-not [string]::IsNullOrWhiteSpace($Devices)) {
  $commandArgs = @("--device", $Devices) + $commandArgs
}

if (-not [string]::IsNullOrWhiteSpace($GrammarFile)) {
  $commandArgs += @("--grammar-file", $GrammarFile)
}

if (-not [string]::IsNullOrWhiteSpace($JsonSchema)) {
  $commandArgs += @("--json-schema", $JsonSchema)
}

$argJson = $commandArgs | ConvertTo-Json -Compress

$py = @"
import json
path = r"/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
new_cmd = json.loads(r'''$argJson''')
lines = open(path, "r", encoding="utf-8").read().splitlines()
out = []
in_cmd = False
def yaml_quote(value):
    text = str(value)
    return "'" + text.replace("'", "''") + "'"
for line in lines:
    if line.startswith('"command":'):
        out.append('"command":')
        for arg in new_cmd:
            out.append(f"- {yaml_quote(arg)}")
        in_cmd = True
        continue
    if in_cmd:
        if line.startswith('"') and not line.startswith('"command":'):
            in_cmd = False
            out.append(line)
        else:
            continue
    else:
        out.append(line)
if in_cmd:
    pass
open(path, "w", encoding="utf-8").write("\n".join(out) + "\n")
"@

$py | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -"

$pyCompose = @"
import json, yaml, subprocess
compose_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/templates/rendered/docker-compose.yaml"
user_config_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
with open(compose_path, "r", encoding="utf-8") as f:
    compose = json.load(f)
with open(user_config_path, "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)
command = config.get("command")
if not command:
    raise SystemExit("command list missing from user_config")
svc = compose["services"]["llamacpp"]
svc["command"] = command
with open(compose_path, "w", encoding="utf-8") as f:
    json.dump(compose, f)
payload = {"custom_compose_config": compose}
subprocess.run(["midclt", "call", "app.update", "llamacpp", json.dumps(payload)], check=True)
"@

$pyCompose | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" | Out-Null

$start = Get-Date
while ((Get-Date) - $start -lt [TimeSpan]::FromSeconds($TimeoutSec)) {
  try {
    $resp = Invoke-RestMethod -Uri "$BaseUrl/health" -TimeoutSec 10
    if ($resp.status -eq "ok") {
      Write-Host "llamacpp healthy at $BaseUrl"
      exit 0
    }
  } catch {
    Start-Sleep -Seconds 5
  }
}

throw "Timed out waiting for llama.cpp server at $BaseUrl"