Initial commit
This commit is contained in:
117
llamacpp_set_command.ps1
Normal file
117
llamacpp_set_command.ps1
Normal file
@@ -0,0 +1,117 @@
|
||||
param(
|
||||
[Parameter(Mandatory = $true)][string]$ModelPath,
|
||||
[Parameter(Mandatory = $true)][int]$CtxSize,
|
||||
[int]$BatchSize = 1024,
|
||||
[int]$UBatchSize = 256,
|
||||
[string]$TensorSplit = "0.5,0.5",
|
||||
[string]$Devices = "0,1",
|
||||
[int]$GpuLayers = 999,
|
||||
[string]$CacheTypeK = "q4_0",
|
||||
[string]$CacheTypeV = "q4_0",
|
||||
[string]$GrammarFile = "",
|
||||
[string]$JsonSchema = "",
|
||||
[string]$BaseUrl = "http://192.168.1.2:8071",
|
||||
[int]$TimeoutSec = 600,
|
||||
[string]$SshExe = "$env:SystemRoot\\System32\\OpenSSH\\ssh.exe",
|
||||
[string]$SshUser = "rushabh",
|
||||
[string]$SshHost = "192.168.1.2",
|
||||
[int]$SshPort = 55555
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
$ProgressPreference = "SilentlyContinue"
|
||||
|
||||
$commandArgs = @(
|
||||
"--model", $ModelPath,
|
||||
"--ctx-size", $CtxSize.ToString(),
|
||||
"--n-gpu-layers", $GpuLayers.ToString(),
|
||||
"--split-mode", "layer",
|
||||
"--tensor-split", $TensorSplit,
|
||||
"--batch-size", $BatchSize.ToString(),
|
||||
"--ubatch-size", $UBatchSize.ToString(),
|
||||
"--cache-type-k", $CacheTypeK,
|
||||
"--cache-type-v", $CacheTypeV,
|
||||
"--flash-attn", "on"
|
||||
)
|
||||
|
||||
if (-not [string]::IsNullOrWhiteSpace($Devices)) {
|
||||
$commandArgs = @("--device", $Devices) + $commandArgs
|
||||
}
|
||||
|
||||
if (-not [string]::IsNullOrWhiteSpace($GrammarFile)) {
|
||||
$commandArgs += @("--grammar-file", $GrammarFile)
|
||||
}
|
||||
|
||||
if (-not [string]::IsNullOrWhiteSpace($JsonSchema)) {
|
||||
$commandArgs += @("--json-schema", $JsonSchema)
|
||||
}
|
||||
|
||||
$argJson = $commandArgs | ConvertTo-Json -Compress
|
||||
|
||||
$py = @"
|
||||
import json
|
||||
path = r"/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
|
||||
new_cmd = json.loads(r'''$argJson''')
|
||||
lines = open(path, "r", encoding="utf-8").read().splitlines()
|
||||
out = []
|
||||
in_cmd = False
|
||||
def yaml_quote(value):
|
||||
text = str(value)
|
||||
return "'" + text.replace("'", "''") + "'"
|
||||
for line in lines:
|
||||
if line.startswith('"command":'):
|
||||
out.append('"command":')
|
||||
for arg in new_cmd:
|
||||
out.append(f"- {yaml_quote(arg)}")
|
||||
in_cmd = True
|
||||
continue
|
||||
if in_cmd:
|
||||
if line.startswith('"') and not line.startswith('"command":'):
|
||||
in_cmd = False
|
||||
out.append(line)
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
out.append(line)
|
||||
if in_cmd:
|
||||
pass
|
||||
open(path, "w", encoding="utf-8").write("\n".join(out) + "\n")
|
||||
"@
|
||||
|
||||
$py | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -"
|
||||
|
||||
$pyCompose = @"
|
||||
import json, yaml, subprocess
|
||||
compose_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/templates/rendered/docker-compose.yaml"
|
||||
user_config_path = "/mnt/.ix-apps/app_configs/llamacpp/versions/1.2.17/user_config.yaml"
|
||||
with open(compose_path, "r", encoding="utf-8") as f:
|
||||
compose = json.load(f)
|
||||
with open(user_config_path, "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
command = config.get("command")
|
||||
if not command:
|
||||
raise SystemExit("command list missing from user_config")
|
||||
svc = compose["services"]["llamacpp"]
|
||||
svc["command"] = command
|
||||
with open(compose_path, "w", encoding="utf-8") as f:
|
||||
json.dump(compose, f)
|
||||
payload = {"custom_compose_config": compose}
|
||||
subprocess.run(["midclt", "call", "app.update", "llamacpp", json.dumps(payload)], check=True)
|
||||
"@
|
||||
|
||||
$pyCompose | & $SshExe -p $SshPort "$SshUser@$SshHost" "sudo -n python3 -" | Out-Null
|
||||
|
||||
$start = Get-Date
|
||||
while ((Get-Date) - $start -lt [TimeSpan]::FromSeconds($TimeoutSec)) {
|
||||
try {
|
||||
$resp = Invoke-RestMethod -Uri "$BaseUrl/health" -TimeoutSec 10
|
||||
if ($resp.status -eq "ok") {
|
||||
Write-Host "llamacpp healthy at $BaseUrl"
|
||||
exit 0
|
||||
}
|
||||
} catch {
|
||||
Start-Sleep -Seconds 5
|
||||
}
|
||||
}
|
||||
|
||||
throw "Timed out waiting for llama.cpp server at $BaseUrl"
|
||||
Reference in New Issue
Block a user