Initial commit

2026-01-07 16:54:39 -08:00
commit 5d1a0ee72b
53 changed files with 9885 additions and 0 deletions
--- a/scripts/deploy_truenas_wrapper.py
+++ b/scripts/deploy_truenas_wrapper.py
@@ -0,0 +1,116 @@
+import argparse
+import asyncio
+import json
+import ssl
+from typing import Any, Dict, List, Optional
+
+import websockets
+
+
+async def _rpc_call(ws_url: str, api_key: str, method: str, params: Optional[list] = None, verify_ssl: bool = False) -> Any:
+    ssl_ctx = None
+    if ws_url.startswith("wss://") and not verify_ssl:
+        ssl_ctx = ssl.create_default_context()
+        ssl_ctx.check_hostname = False
+        ssl_ctx.verify_mode = ssl.CERT_NONE
+
+    async with websockets.connect(ws_url, ssl=ssl_ctx) as ws:
+        await ws.send(json.dumps({"msg": "connect", "version": "1", "support": ["1"]}))
+        connected = json.loads(await ws.recv())
+        if connected.get("msg") != "connected":
+            raise RuntimeError("failed to connect to TrueNAS websocket")
+
+        await ws.send(json.dumps({"id": 1, "msg": "method", "method": "auth.login_with_api_key", "params": [api_key]}))
+        auth_resp = json.loads(await ws.recv())
+        if not auth_resp.get("result"):
+            raise RuntimeError("API key authentication failed")
+
+        req_id = 2
+        await ws.send(json.dumps({"id": req_id, "msg": "method", "method": method, "params": params or []}))
+        while True:
+            raw = json.loads(await ws.recv())
+            if raw.get("id") != req_id:
+                continue
+            if raw.get("msg") == "error":
+                raise RuntimeError(raw.get("error"))
+            return raw.get("result")
+
+
+async def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ws-url", required=True)
+    parser.add_argument("--api-key", required=True)
+    parser.add_argument("--api-user")
+    parser.add_argument("--app-name", required=True)
+    parser.add_argument("--image", required=True)
+    parser.add_argument("--model-host-path", required=True)
+    parser.add_argument("--llamacpp-base-url", required=True)
+    parser.add_argument("--network", required=True)
+    parser.add_argument("--api-port", type=int, default=9091)
+    parser.add_argument("--ui-port", type=int, default=9092)
+    parser.add_argument("--verify-ssl", action="store_true")
+    args = parser.parse_args()
+
+    api_port = args.api_port
+    ui_port = args.ui_port
+
+    env = {
+        "PORT_A": str(api_port),
+        "PORT_B": str(ui_port),
+        "LLAMACPP_BASE_URL": args.llamacpp_base_url,
+        "MODEL_DIR": "/models",
+        "TRUENAS_WS_URL": args.ws_url,
+        "TRUENAS_API_KEY": args.api_key,
+        "TRUENAS_APP_NAME": "llamacpp",
+        "TRUENAS_VERIFY_SSL": "false",
+    }
+    if args.api_user:
+        env["TRUENAS_API_USER"] = args.api_user
+
+    compose = {
+        "services": {
+            "wrapper": {
+                "image": args.image,
+                "restart": "unless-stopped",
+                "ports": [
+                    f"{api_port}:{api_port}",
+                    f"{ui_port}:{ui_port}",
+                ],
+                "environment": env,
+                "volumes": [
+                    f"{args.model_host_path}:/models",
+                    "/var/run/docker.sock:/var/run/docker.sock",
+                ],
+                "networks": ["llamacpp_net"],
+            }
+        },
+        "networks": {
+            "llamacpp_net": {"external": True, "name": args.network}
+        },
+    }
+
+    create_payload = {
+        "custom_app": True,
+        "app_name": args.app_name,
+        "custom_compose_config": compose,
+    }
+
+    existing = await _rpc_call(args.ws_url, args.api_key, "app.query", [[["id", "=", args.app_name]]], args.verify_ssl)
+    if existing:
+        result = await _rpc_call(
+            args.ws_url,
+            args.api_key,
+            "app.update",
+            [args.app_name, {"custom_compose_config": compose}],
+            args.verify_ssl,
+        )
+        action = "updated"
+    else:
+        result = await _rpc_call(args.ws_url, args.api_key, "app.create", [create_payload], args.verify_ssl)
+        action = "created"
+
+    print(json.dumps({"action": action, "api_port": api_port, "ui_port": ui_port, "result": result}, indent=2))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/remote_wrapper_test.py
+++ b/scripts/remote_wrapper_test.py
@@ -0,0 +1,162 @@
+import json
+import os
+import time
+from datetime import datetime
+
+import requests
+
+BASE = os.getenv("WRAPPER_BASE", "http://192.168.1.2:9000")
+UPSTREAM = os.getenv("LLAMACPP_BASE", "http://192.168.1.2:8071")
+RUNS = int(os.getenv("RUNS", "100"))
+MAX_TOKENS = int(os.getenv("MAX_TOKENS", "4"))
+TIMEOUT = int(os.getenv("REQ_TIMEOUT", "300"))
+
+
+def _now():
+    return datetime.utcnow().isoformat() + "Z"
+
+
+def _get_loaded_model_id():
+    deadline = time.time() + 600
+    last_error = None
+    while time.time() < deadline:
+        try:
+            resp = requests.get(UPSTREAM + "/v1/models", timeout=30)
+            resp.raise_for_status()
+            data = resp.json().get("data") or []
+            if data:
+                return data[0].get("id")
+            last_error = "no models reported by upstream"
+        except Exception as exc:
+            last_error = str(exc)
+        time.sleep(5)
+    raise RuntimeError(f"upstream not ready: {last_error}")
+
+
+def _stream_ok(resp):
+    got_data = False
+    got_done = False
+    for line in resp.iter_lines(decode_unicode=True):
+        if not line:
+            continue
+        if line.startswith("data:"):
+            got_data = True
+            if line.strip() == "data: [DONE]":
+                got_done = True
+                break
+    return got_data, got_done
+
+
+def run_suite(model_id, idx):
+    results = {}
+
+    # Models
+    r = requests.get(BASE + "/v1/models", timeout=30)
+    results["models"] = r.status_code
+
+    r = requests.get(BASE + f"/v1/models/{model_id}", timeout=30)
+    results["model_get"] = r.status_code
+
+    # Chat completions non-stream
+    payload = {
+        "model": model_id,
+        "messages": [{"role": "user", "content": f"Run {idx}: say ok."}],
+        "max_tokens": MAX_TOKENS,
+        "temperature": (idx % 5) / 10.0,
+    }
+    r = requests.post(BASE + "/v1/chat/completions", json=payload, timeout=TIMEOUT)
+    results["chat"] = r.status_code
+
+    # Chat completions stream
+    payload_stream = dict(payload)
+    payload_stream["stream"] = True
+    r = requests.post(BASE + "/v1/chat/completions", json=payload_stream, stream=True, timeout=TIMEOUT)
+    ok_data, ok_done = _stream_ok(r)
+    results["chat_stream"] = r.status_code
+    results["chat_stream_ok"] = ok_data and ok_done
+
+    # Responses non-stream
+    payload_resp = {
+        "model": model_id,
+        "input": f"Run {idx}: say ok.",
+        "max_output_tokens": MAX_TOKENS,
+    }
+    r = requests.post(BASE + "/v1/responses", json=payload_resp, timeout=TIMEOUT)
+    results["responses"] = r.status_code
+
+    # Responses stream
+    payload_resp_stream = {
+        "model": model_id,
+        "input": f"Run {idx}: say ok.",
+        "stream": True,
+    }
+    r = requests.post(BASE + "/v1/responses", json=payload_resp_stream, stream=True, timeout=TIMEOUT)
+    ok_data, ok_done = _stream_ok(r)
+    results["responses_stream"] = r.status_code
+    results["responses_stream_ok"] = ok_data and ok_done
+
+    # Embeddings (best effort)
+    payload_emb = {"model": model_id, "input": f"Run {idx}"}
+    r = requests.post(BASE + "/v1/embeddings", json=payload_emb, timeout=TIMEOUT)
+    results["embeddings"] = r.status_code
+
+    # Proxy
+    r = requests.post(BASE + "/proxy/llamacpp/v1/chat/completions", json=payload, timeout=TIMEOUT)
+    results["proxy"] = r.status_code
+
+    return results
+
+
+def main():
+    summary = {
+        "started_at": _now(),
+        "base": BASE,
+        "upstream": UPSTREAM,
+        "runs": RUNS,
+        "max_tokens": MAX_TOKENS,
+        "results": [],
+    }
+
+    model_id = _get_loaded_model_id()
+    summary["model_id"] = model_id
+
+    for i in range(1, RUNS + 1):
+        start = time.time()
+        try:
+            results = run_suite(model_id, i)
+            ok = all(
+                results.get(key) == 200
+                for key in ("models", "model_get", "chat", "chat_stream", "responses", "responses_stream", "proxy")
+            )
+            stream_ok = results.get("chat_stream_ok") and results.get("responses_stream_ok")
+            summary["results"].append({
+                "run": i,
+                "ok": ok and stream_ok,
+                "stream_ok": stream_ok,
+                "status": results,
+                "elapsed_s": round(time.time() - start, 2),
+            })
+        except Exception as exc:
+            summary["results"].append({
+                "run": i,
+                "ok": False,
+                "stream_ok": False,
+                "error": str(exc),
+                "elapsed_s": round(time.time() - start, 2),
+            })
+        print(f"Run {i}/{RUNS} done")
+
+    summary["finished_at"] = _now()
+
+    os.makedirs("reports", exist_ok=True)
+    out_path = os.path.join("reports", "remote_wrapper_test.json")
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(summary, f, indent=2)
+
+    # Print a compact summary
+    ok_count = sum(1 for r in summary["results"] if r.get("ok"))
+    print(f"OK {ok_count}/{RUNS}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/update_llamacpp_flags.ps1
+++ b/scripts/update_llamacpp_flags.ps1
@@ -0,0 +1,29 @@
+param(
+  [string]$OutDocs = "reports\\llamacpp_docs.md",
+  [string]$OutFlags = "reports\\llamacpp_flags.txt"
+)
+
+$urls = @(
+  "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/examples/server/README.md",
+  "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/examples/server/README-llama-server.md",
+  "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/README.md"
+)
+
+$out = @()
+foreach ($u in $urls) {
+  try {
+    $content = Invoke-WebRequest -Uri $u -UseBasicParsing -TimeoutSec 30
+    $out += "# Source: $u"
+    $out += $content.Content
+  } catch {
+    $out += "# Source: $u"
+    $out += "(failed to fetch)"
+  }
+}
+
+$out | Set-Content -Encoding UTF8 $OutDocs
+
+$docs = Get-Content $OutDocs -Raw
+$flags = [regex]::Matches($docs, "--[a-zA-Z0-9\\-]+") | ForEach-Object { $_.Value }
+$flags = $flags | Sort-Object -Unique
+$flags | Set-Content -Encoding UTF8 $OutFlags