118 lines
3.5 KiB
PowerShell
118 lines
3.5 KiB
PowerShell
param(
|
|
[Parameter(Mandatory = $true)][string]$ModelPath,
|
|
[Parameter(Mandatory = $true)][int]$CtxSize,
|
|
[int]$BatchSize = 1024,
|
|
[int]$UBatchSize = 256,
|
|
[string]$TensorSplit = "0.5,0.5",
|
|
[string]$Devices = "0,1",
|
|
[int]$GpuLayers = 999,
|
|
[string]$CacheTypeK = "q4_0",
|
|
[string]$CacheTypeV = "q4_0",
|
|
[string]$GrammarFile = "",
|
|
[string]$JsonSchema = "",
|
|
[string]$BaseUrl = "http://192.168.1.2:8071",
|
|
[int]$TimeoutSec = 600,
|
|
[string]$SshExe = "$env:SystemRoot\\System32\\OpenSSH\\ssh.exe",
|
|
[string]$SshUser = "rushabh",
|
|
[string]$SshHost = "192.168.1.2",
|
|
[int]$SshPort = 55555
|
|
)
|
|
|
|
$ErrorActionPreference = "Stop"
|
|
$ProgressPreference = "SilentlyContinue"
|
|
|
|
$commandArgs = @(
|
|
"--model", $ModelPath,
|
|
"--ctx-size", $CtxSize.ToString(),
|
|
"--n-gpu-layers", $GpuLayers.ToString(),
|
|
"--split-mode", "layer",
|
|
"--tensor-split", $TensorSplit,
|
|
"--batch-size", $BatchSize.ToString(),
|
|
"--ubatch-size", $UBatchSize.ToString(),
|
|
"--cache-type-k", $CacheTypeK,
|
|
"--cache-type-v", $CacheTypeV,
|
|
"--flash-attn", "on"
|
|
)
|
|
|
|
if (-not [string]::IsNullOrWhiteSpace($Devices)) {
|
|
$commandArgs = @("--device", $Devices) + $commandArgs
|
|
}
|
|
|
|
if (-not [string]::IsNullOrWhiteSpace($GrammarFile)) {
|
|
$commandArgs += @("--grammar-file", $GrammarFile)
|
|
}
|
|
|
|
if (-not [string]::IsNullOrWhiteSpace($JsonSchema)) {
|
|
$commandArgs += @("--json-schema", $JsonSchema)
|
|
}
|
|
|
|
$argJson = $commandArgs | ConvertTo-Json -Compress
|
|
|
|
$py = @"
|
|
import json
|
|
path = r"/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
|
|
new_cmd = json.loads(r'''$argJson''')
|
|
lines = open(path, "r", encoding="utf-8").read().splitlines()
|
|
out = []
|
|
in_cmd = False
|
|
def yaml_quote(value):
|
|
text = str(value)
|
|
return "'" + text.replace("'", "''") + "'"
|
|
for line in lines:
|
|
if line.startswith('"command":'):
|
|
out.append('"command":')
|
|
for arg in new_cmd:
|
|
out.append(f"- {yaml_quote(arg)}")
|
|
in_cmd = True
|
|
continue
|
|
if in_cmd:
|
|
if line.startswith('"') and not line.startswith('"command":'):
|
|
in_cmd = False
|
|
out.append(line)
|
|
else:
|
|
continue
|
|
else:
|
|
out.append(line)
|
|
if in_cmd:
|
|
pass
|
|
open(path, "w", encoding="utf-8").write("\n".join(out) + "\n")
|
|
"@
|
|
|
|
$py | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -"
|
|
|
|
$pyCompose = @"
|
|
import json, yaml, subprocess
|
|
compose_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/templates/rendered/docker-compose.yaml"
|
|
user_config_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
|
|
with open(compose_path, "r", encoding="utf-8") as f:
|
|
compose = json.load(f)
|
|
with open(user_config_path, "r", encoding="utf-8") as f:
|
|
config = yaml.safe_load(f)
|
|
command = config.get("command")
|
|
if not command:
|
|
raise SystemExit("command list missing from user_config")
|
|
svc = compose["services"]["llamacpp"]
|
|
svc["command"] = command
|
|
with open(compose_path, "w", encoding="utf-8") as f:
|
|
json.dump(compose, f)
|
|
payload = {"custom_compose_config": compose}
|
|
subprocess.run(["midclt", "call", "app.update", "llamacpp", json.dumps(payload)], check=True)
|
|
"@
|
|
|
|
$pyCompose | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" | Out-Null
|
|
|
|
$start = Get-Date
|
|
while ((Get-Date) - $start -lt [TimeSpan]::FromSeconds($TimeoutSec)) {
|
|
try {
|
|
$resp = Invoke-RestMethod -Uri "$BaseUrl/health" -TimeoutSec 10
|
|
if ($resp.status -eq "ok") {
|
|
Write-Host "llamacpp healthy at $BaseUrl"
|
|
exit 0
|
|
}
|
|
} catch {
|
|
Start-Sleep -Seconds 5
|
|
}
|
|
}
|
|
|
|
throw "Timed out waiting for llama.cpp server at $BaseUrl"
|