param( [Parameter(Mandatory = $true)][string]$ModelPath, [Parameter(Mandatory = $true)][int]$CtxSize, [int]$BatchSize = 1024, [int]$UBatchSize = 256, [string]$TensorSplit = "0.5,0.5", [string]$Devices = "0,1", [int]$GpuLayers = 999, [string]$CacheTypeK = "q4_0", [string]$CacheTypeV = "q4_0", [string]$GrammarFile = "", [string]$JsonSchema = "", [string]$BaseUrl = "http://192.168.1.2:8071", [int]$TimeoutSec = 600, [string]$SshExe = "$env:SystemRoot\\System32\\OpenSSH\\ssh.exe", [string]$SshUser = "rushabh", [string]$SshHost = "192.168.1.2", [int]$SshPort = 55555 ) $ErrorActionPreference = "Stop" $ProgressPreference = "SilentlyContinue" $commandArgs = @( "--model", $ModelPath, "--ctx-size", $CtxSize.ToString(), "--n-gpu-layers", $GpuLayers.ToString(), "--split-mode", "layer", "--tensor-split", $TensorSplit, "--batch-size", $BatchSize.ToString(), "--ubatch-size", $UBatchSize.ToString(), "--cache-type-k", $CacheTypeK, "--cache-type-v", $CacheTypeV, "--flash-attn", "on" ) if (-not [string]::IsNullOrWhiteSpace($Devices)) { $commandArgs = @("--device", $Devices) + $commandArgs } if (-not [string]::IsNullOrWhiteSpace($GrammarFile)) { $commandArgs += @("--grammar-file", $GrammarFile) } if (-not [string]::IsNullOrWhiteSpace($JsonSchema)) { $commandArgs += @("--json-schema", $JsonSchema) } $argJson = $commandArgs | ConvertTo-Json -Compress $py = @" import json path = r"/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml" new_cmd = json.loads(r'''$argJson''') lines = open(path, "r", encoding="utf-8").read().splitlines() out = [] in_cmd = False def yaml_quote(value): text = str(value) return "'" + text.replace("'", "''") + "'" for line in lines: if line.startswith('"command":'): out.append('"command":') for arg in new_cmd: out.append(f"- {yaml_quote(arg)}") in_cmd = True continue if in_cmd: if line.startswith('"') and not line.startswith('"command":'): in_cmd = False out.append(line) else: continue else: out.append(line) if in_cmd: pass open(path, "w", encoding="utf-8").write("\n".join(out) + "\n") "@ $py | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" $pyCompose = @" import json, yaml, subprocess compose_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/templates/rendered/docker-compose.yaml" user_config_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml" with open(compose_path, "r", encoding="utf-8") as f: compose = json.load(f) with open(user_config_path, "r", encoding="utf-8") as f: config = yaml.safe_load(f) command = config.get("command") if not command: raise SystemExit("command list missing from user_config") svc = compose["services"]["llamacpp"] svc["command"] = command with open(compose_path, "w", encoding="utf-8") as f: json.dump(compose, f) payload = {"custom_compose_config": compose} subprocess.run(["midclt", "call", "app.update", "llamacpp", json.dumps(payload)], check=True) "@ $pyCompose | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" | Out-Null $start = Get-Date while ((Get-Date) - $start -lt [TimeSpan]::FromSeconds($TimeoutSec)) { try { $resp = Invoke-RestMethod -Uri "$BaseUrl/health" -TimeoutSec 10 if ($resp.status -eq "ok") { Write-Host "llamacpp healthy at $BaseUrl" exit 0 } } catch { Start-Sleep -Seconds 5 } } throw "Timed out waiting for llama.cpp server at $BaseUrl"