Initial commit
This commit is contained in:
116
scripts/deploy_truenas_wrapper.py
Normal file
116
scripts/deploy_truenas_wrapper.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import ssl
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import websockets
|
||||
|
||||
|
||||
async def _rpc_call(ws_url: str, api_key: str, method: str, params: Optional[list] = None, verify_ssl: bool = False) -> Any:
|
||||
ssl_ctx = None
|
||||
if ws_url.startswith("wss://") and not verify_ssl:
|
||||
ssl_ctx = ssl.create_default_context()
|
||||
ssl_ctx.check_hostname = False
|
||||
ssl_ctx.verify_mode = ssl.CERT_NONE
|
||||
|
||||
async with websockets.connect(ws_url, ssl=ssl_ctx) as ws:
|
||||
await ws.send(json.dumps({"msg": "connect", "version": "1", "support": ["1"]}))
|
||||
connected = json.loads(await ws.recv())
|
||||
if connected.get("msg") != "connected":
|
||||
raise RuntimeError("failed to connect to TrueNAS websocket")
|
||||
|
||||
await ws.send(json.dumps({"id": 1, "msg": "method", "method": "auth.login_with_api_key", "params": [api_key]}))
|
||||
auth_resp = json.loads(await ws.recv())
|
||||
if not auth_resp.get("result"):
|
||||
raise RuntimeError("API key authentication failed")
|
||||
|
||||
req_id = 2
|
||||
await ws.send(json.dumps({"id": req_id, "msg": "method", "method": method, "params": params or []}))
|
||||
while True:
|
||||
raw = json.loads(await ws.recv())
|
||||
if raw.get("id") != req_id:
|
||||
continue
|
||||
if raw.get("msg") == "error":
|
||||
raise RuntimeError(raw.get("error"))
|
||||
return raw.get("result")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--ws-url", required=True)
|
||||
parser.add_argument("--api-key", required=True)
|
||||
parser.add_argument("--api-user")
|
||||
parser.add_argument("--app-name", required=True)
|
||||
parser.add_argument("--image", required=True)
|
||||
parser.add_argument("--model-host-path", required=True)
|
||||
parser.add_argument("--llamacpp-base-url", required=True)
|
||||
parser.add_argument("--network", required=True)
|
||||
parser.add_argument("--api-port", type=int, default=9091)
|
||||
parser.add_argument("--ui-port", type=int, default=9092)
|
||||
parser.add_argument("--verify-ssl", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
api_port = args.api_port
|
||||
ui_port = args.ui_port
|
||||
|
||||
env = {
|
||||
"PORT_A": str(api_port),
|
||||
"PORT_B": str(ui_port),
|
||||
"LLAMACPP_BASE_URL": args.llamacpp_base_url,
|
||||
"MODEL_DIR": "/models",
|
||||
"TRUENAS_WS_URL": args.ws_url,
|
||||
"TRUENAS_API_KEY": args.api_key,
|
||||
"TRUENAS_APP_NAME": "llamacpp",
|
||||
"TRUENAS_VERIFY_SSL": "false",
|
||||
}
|
||||
if args.api_user:
|
||||
env["TRUENAS_API_USER"] = args.api_user
|
||||
|
||||
compose = {
|
||||
"services": {
|
||||
"wrapper": {
|
||||
"image": args.image,
|
||||
"restart": "unless-stopped",
|
||||
"ports": [
|
||||
f"{api_port}:{api_port}",
|
||||
f"{ui_port}:{ui_port}",
|
||||
],
|
||||
"environment": env,
|
||||
"volumes": [
|
||||
f"{args.model_host_path}:/models",
|
||||
"/var/run/docker.sock:/var/run/docker.sock",
|
||||
],
|
||||
"networks": ["llamacpp_net"],
|
||||
}
|
||||
},
|
||||
"networks": {
|
||||
"llamacpp_net": {"external": True, "name": args.network}
|
||||
},
|
||||
}
|
||||
|
||||
create_payload = {
|
||||
"custom_app": True,
|
||||
"app_name": args.app_name,
|
||||
"custom_compose_config": compose,
|
||||
}
|
||||
|
||||
existing = await _rpc_call(args.ws_url, args.api_key, "app.query", [[["id", "=", args.app_name]]], args.verify_ssl)
|
||||
if existing:
|
||||
result = await _rpc_call(
|
||||
args.ws_url,
|
||||
args.api_key,
|
||||
"app.update",
|
||||
[args.app_name, {"custom_compose_config": compose}],
|
||||
args.verify_ssl,
|
||||
)
|
||||
action = "updated"
|
||||
else:
|
||||
result = await _rpc_call(args.ws_url, args.api_key, "app.create", [create_payload], args.verify_ssl)
|
||||
action = "created"
|
||||
|
||||
print(json.dumps({"action": action, "api_port": api_port, "ui_port": ui_port, "result": result}, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
162
scripts/remote_wrapper_test.py
Normal file
162
scripts/remote_wrapper_test.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
BASE = os.getenv("WRAPPER_BASE", "http://192.168.1.2:9000")
|
||||
UPSTREAM = os.getenv("LLAMACPP_BASE", "http://192.168.1.2:8071")
|
||||
RUNS = int(os.getenv("RUNS", "100"))
|
||||
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "4"))
|
||||
TIMEOUT = int(os.getenv("REQ_TIMEOUT", "300"))
|
||||
|
||||
|
||||
def _now():
|
||||
return datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
|
||||
def _get_loaded_model_id():
|
||||
deadline = time.time() + 600
|
||||
last_error = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
resp = requests.get(UPSTREAM + "/v1/models", timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data") or []
|
||||
if data:
|
||||
return data[0].get("id")
|
||||
last_error = "no models reported by upstream"
|
||||
except Exception as exc:
|
||||
last_error = str(exc)
|
||||
time.sleep(5)
|
||||
raise RuntimeError(f"upstream not ready: {last_error}")
|
||||
|
||||
|
||||
def _stream_ok(resp):
|
||||
got_data = False
|
||||
got_done = False
|
||||
for line in resp.iter_lines(decode_unicode=True):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data:"):
|
||||
got_data = True
|
||||
if line.strip() == "data: [DONE]":
|
||||
got_done = True
|
||||
break
|
||||
return got_data, got_done
|
||||
|
||||
|
||||
def run_suite(model_id, idx):
|
||||
results = {}
|
||||
|
||||
# Models
|
||||
r = requests.get(BASE + "/v1/models", timeout=30)
|
||||
results["models"] = r.status_code
|
||||
|
||||
r = requests.get(BASE + f"/v1/models/{model_id}", timeout=30)
|
||||
results["model_get"] = r.status_code
|
||||
|
||||
# Chat completions non-stream
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": f"Run {idx}: say ok."}],
|
||||
"max_tokens": MAX_TOKENS,
|
||||
"temperature": (idx % 5) / 10.0,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/chat/completions", json=payload, timeout=TIMEOUT)
|
||||
results["chat"] = r.status_code
|
||||
|
||||
# Chat completions stream
|
||||
payload_stream = dict(payload)
|
||||
payload_stream["stream"] = True
|
||||
r = requests.post(BASE + "/v1/chat/completions", json=payload_stream, stream=True, timeout=TIMEOUT)
|
||||
ok_data, ok_done = _stream_ok(r)
|
||||
results["chat_stream"] = r.status_code
|
||||
results["chat_stream_ok"] = ok_data and ok_done
|
||||
|
||||
# Responses non-stream
|
||||
payload_resp = {
|
||||
"model": model_id,
|
||||
"input": f"Run {idx}: say ok.",
|
||||
"max_output_tokens": MAX_TOKENS,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/responses", json=payload_resp, timeout=TIMEOUT)
|
||||
results["responses"] = r.status_code
|
||||
|
||||
# Responses stream
|
||||
payload_resp_stream = {
|
||||
"model": model_id,
|
||||
"input": f"Run {idx}: say ok.",
|
||||
"stream": True,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/responses", json=payload_resp_stream, stream=True, timeout=TIMEOUT)
|
||||
ok_data, ok_done = _stream_ok(r)
|
||||
results["responses_stream"] = r.status_code
|
||||
results["responses_stream_ok"] = ok_data and ok_done
|
||||
|
||||
# Embeddings (best effort)
|
||||
payload_emb = {"model": model_id, "input": f"Run {idx}"}
|
||||
r = requests.post(BASE + "/v1/embeddings", json=payload_emb, timeout=TIMEOUT)
|
||||
results["embeddings"] = r.status_code
|
||||
|
||||
# Proxy
|
||||
r = requests.post(BASE + "/proxy/llamacpp/v1/chat/completions", json=payload, timeout=TIMEOUT)
|
||||
results["proxy"] = r.status_code
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
summary = {
|
||||
"started_at": _now(),
|
||||
"base": BASE,
|
||||
"upstream": UPSTREAM,
|
||||
"runs": RUNS,
|
||||
"max_tokens": MAX_TOKENS,
|
||||
"results": [],
|
||||
}
|
||||
|
||||
model_id = _get_loaded_model_id()
|
||||
summary["model_id"] = model_id
|
||||
|
||||
for i in range(1, RUNS + 1):
|
||||
start = time.time()
|
||||
try:
|
||||
results = run_suite(model_id, i)
|
||||
ok = all(
|
||||
results.get(key) == 200
|
||||
for key in ("models", "model_get", "chat", "chat_stream", "responses", "responses_stream", "proxy")
|
||||
)
|
||||
stream_ok = results.get("chat_stream_ok") and results.get("responses_stream_ok")
|
||||
summary["results"].append({
|
||||
"run": i,
|
||||
"ok": ok and stream_ok,
|
||||
"stream_ok": stream_ok,
|
||||
"status": results,
|
||||
"elapsed_s": round(time.time() - start, 2),
|
||||
})
|
||||
except Exception as exc:
|
||||
summary["results"].append({
|
||||
"run": i,
|
||||
"ok": False,
|
||||
"stream_ok": False,
|
||||
"error": str(exc),
|
||||
"elapsed_s": round(time.time() - start, 2),
|
||||
})
|
||||
print(f"Run {i}/{RUNS} done")
|
||||
|
||||
summary["finished_at"] = _now()
|
||||
|
||||
os.makedirs("reports", exist_ok=True)
|
||||
out_path = os.path.join("reports", "remote_wrapper_test.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
|
||||
# Print a compact summary
|
||||
ok_count = sum(1 for r in summary["results"] if r.get("ok"))
|
||||
print(f"OK {ok_count}/{RUNS}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
29
scripts/update_llamacpp_flags.ps1
Normal file
29
scripts/update_llamacpp_flags.ps1
Normal file
@@ -0,0 +1,29 @@
|
||||
param(
|
||||
[string]$OutDocs = "reports\\llamacpp_docs.md",
|
||||
[string]$OutFlags = "reports\\llamacpp_flags.txt"
|
||||
)
|
||||
|
||||
$urls = @(
|
||||
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/examples/server/README.md",
|
||||
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/examples/server/README-llama-server.md",
|
||||
"https://raw.githubusercontent.com/ggerganov/llama.cpp/master/README.md"
|
||||
)
|
||||
|
||||
$out = @()
|
||||
foreach ($u in $urls) {
|
||||
try {
|
||||
$content = Invoke-WebRequest -Uri $u -UseBasicParsing -TimeoutSec 30
|
||||
$out += "# Source: $u"
|
||||
$out += $content.Content
|
||||
} catch {
|
||||
$out += "# Source: $u"
|
||||
$out += "(failed to fetch)"
|
||||
}
|
||||
}
|
||||
|
||||
$out | Set-Content -Encoding UTF8 $OutDocs
|
||||
|
||||
$docs = Get-Content $OutDocs -Raw
|
||||
$flags = [regex]::Matches($docs, "--[a-zA-Z0-9\\-]+") | ForEach-Object { $_.Value }
|
||||
$flags = $flags | Sort-Object -Unique
|
||||
$flags | Set-Content -Encoding UTF8 $OutFlags
|
||||
Reference in New Issue
Block a user