Initial commit
This commit is contained in:
61
tests/conftest.py
Normal file
61
tests/conftest.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
import respx
|
||||
|
||||
from app.api_app import create_api_app
|
||||
from app.ui_app import create_ui_app
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def agents_config(tmp_path: Path) -> Path:
|
||||
data = {
|
||||
"image": "ghcr.io/ggml-org/llama.cpp:server-cuda",
|
||||
"container_name": "ix-llamacpp-llamacpp-1",
|
||||
"host_port": 8071,
|
||||
"container_port": 8080,
|
||||
"web_ui_url": "http://0.0.0.0:8071/",
|
||||
"model_host_path": str(tmp_path),
|
||||
"model_container_path": str(tmp_path),
|
||||
"models": [],
|
||||
"network": "ix-llamacpp_default",
|
||||
"subnets": ["172.16.18.0/24"],
|
||||
"gpu_count": 2,
|
||||
"gpu_name": "NVIDIA RTX 5060 Ti",
|
||||
}
|
||||
path = tmp_path / "agents_config.json"
|
||||
path.write_text(json.dumps(data), encoding="utf-8")
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def model_dir(tmp_path: Path) -> Path:
|
||||
(tmp_path / "model-a.gguf").write_text("x", encoding="utf-8")
|
||||
(tmp_path / "model-b.gguf").write_text("y", encoding="utf-8")
|
||||
return tmp_path
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def api_client(monkeypatch: pytest.MonkeyPatch, agents_config: Path, model_dir: Path):
|
||||
monkeypatch.setenv("AGENTS_CONFIG_PATH", str(agents_config))
|
||||
monkeypatch.setenv("MODEL_DIR", str(model_dir))
|
||||
monkeypatch.setenv("LLAMACPP_BASE_URL", "http://llama.test")
|
||||
app = create_api_app()
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def ui_client(monkeypatch: pytest.MonkeyPatch, agents_config: Path, model_dir: Path):
|
||||
monkeypatch.setenv("AGENTS_CONFIG_PATH", str(agents_config))
|
||||
monkeypatch.setenv("MODEL_DIR", str(model_dir))
|
||||
app = create_ui_app()
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def respx_mock():
|
||||
with respx.mock(assert_all_called=False) as mock:
|
||||
yield mock
|
||||
77
tests/test_chat_completions.py
Normal file
77
tests/test_chat_completions.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import json
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_chat_completions_non_stream(api_client, respx_mock, case):
|
||||
respx_mock.get("http://llama.test/v1/models").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]})
|
||||
)
|
||||
respx_mock.post("http://llama.test/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json={"id": f"chatcmpl-{case}", "choices": [{"message": {"content": "ok"}}]})
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": "model-a.gguf",
|
||||
"messages": [{"role": "user", "content": f"hello {case}"}],
|
||||
"temperature": (case % 10) / 10,
|
||||
}
|
||||
resp = api_client.post("/v1/chat/completions", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["choices"][0]["message"]["content"] == "ok"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_chat_completions_stream(api_client, respx_mock, case):
|
||||
respx_mock.get("http://llama.test/v1/models").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]})
|
||||
)
|
||||
|
||||
def stream_response(request):
|
||||
content = b"data: {\"id\": \"chunk\"}\n\n"
|
||||
return httpx.Response(200, content=content, headers={"Content-Type": "text/event-stream"})
|
||||
|
||||
respx_mock.post("http://llama.test/v1/chat/completions").mock(side_effect=stream_response)
|
||||
|
||||
payload = {
|
||||
"model": "model-a.gguf",
|
||||
"messages": [{"role": "user", "content": f"hello {case}"}],
|
||||
"stream": True,
|
||||
}
|
||||
with api_client.stream("POST", "/v1/chat/completions", json=payload) as resp:
|
||||
assert resp.status_code == 200
|
||||
body = b"".join(resp.iter_bytes())
|
||||
assert b"data:" in body
|
||||
|
||||
|
||||
def test_chat_completions_tools_normalize(api_client, respx_mock):
|
||||
respx_mock.get("http://llama.test/v1/models").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]})
|
||||
)
|
||||
|
||||
def handler(request):
|
||||
data = request.json()
|
||||
tools = data.get("tools") or []
|
||||
assert tools
|
||||
assert tools[0].get("function", {}).get("name") == "format_final_json_response"
|
||||
return httpx.Response(200, json={"id": "chatcmpl-tools", "choices": [{"message": {"content": "ok"}}]})
|
||||
|
||||
respx_mock.post("http://llama.test/v1/chat/completions").mock(side_effect=handler)
|
||||
|
||||
payload = {
|
||||
"model": "model-a.gguf",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "format_final_json_response",
|
||||
"parameters": {"type": "object"},
|
||||
}
|
||||
],
|
||||
"tool_choice": {"type": "function", "name": "format_final_json_response"},
|
||||
}
|
||||
|
||||
resp = api_client.post("/v1/chat/completions", json=payload)
|
||||
assert resp.status_code == 200
|
||||
14
tests/test_embeddings.py
Normal file
14
tests/test_embeddings.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_embeddings(api_client, respx_mock, case):
|
||||
respx_mock.post("http://llama.test/v1/embeddings").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"embedding": [0.1, 0.2]}]})
|
||||
)
|
||||
payload = {"model": "model-a.gguf", "input": f"text-{case}"}
|
||||
resp = api_client.post("/v1/embeddings", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "data" in data
|
||||
24
tests/test_models.py
Normal file
24
tests/test_models.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_list_models_cases(api_client, case):
|
||||
resp = api_client.get("/v1/models", headers={"x-case": str(case)})
|
||||
assert resp.status_code == 200
|
||||
payload = resp.json()
|
||||
assert payload["object"] == "list"
|
||||
assert isinstance(payload["data"], list)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [f"model-a.gguf" for _ in range(120)])
|
||||
def test_get_model_ok(api_client, model_id):
|
||||
resp = api_client.get(f"/v1/models/{model_id}")
|
||||
assert resp.status_code == 200
|
||||
payload = resp.json()
|
||||
assert payload["id"] == model_id
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [f"missing-{i}" for i in range(120)])
|
||||
def test_get_model_not_found(api_client, model_id):
|
||||
resp = api_client.get(f"/v1/models/{model_id}")
|
||||
assert resp.status_code == 404
|
||||
12
tests/test_proxy.py
Normal file
12
tests/test_proxy.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_proxy_passthrough(api_client, respx_mock, case):
|
||||
respx_mock.post("http://llama.test/test/path").mock(
|
||||
return_value=httpx.Response(200, content=f"ok-{case}".encode())
|
||||
)
|
||||
resp = api_client.post("/proxy/llamacpp/test/path", content=b"hello")
|
||||
assert resp.status_code == 200
|
||||
assert resp.content.startswith(b"ok-")
|
||||
283
tests/test_remote_wrapper.py
Normal file
283
tests/test_remote_wrapper.py
Normal file
@@ -0,0 +1,283 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import ssl
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import websockets
|
||||
|
||||
WRAPPER_BASE = os.getenv("WRAPPER_BASE", "http://192.168.1.2:9093")
|
||||
UI_BASE = os.getenv("UI_BASE", "http://192.168.1.2:9094")
|
||||
TRUENAS_WS_URL = os.getenv("TRUENAS_WS_URL", "wss://192.168.1.2/websocket")
|
||||
TRUENAS_API_KEY = os.getenv("TRUENAS_API_KEY", "")
|
||||
TRUENAS_APP_NAME = os.getenv("TRUENAS_APP_NAME", "llamacpp")
|
||||
MODEL_REQUEST = os.getenv("MODEL_REQUEST", "")
|
||||
|
||||
|
||||
async def _rpc_call(method: str, params: List | None = None):
|
||||
if not TRUENAS_API_KEY:
|
||||
pytest.skip("TRUENAS_API_KEY not set")
|
||||
ssl_ctx = ssl.create_default_context()
|
||||
ssl_ctx.check_hostname = False
|
||||
ssl_ctx.verify_mode = ssl.CERT_NONE
|
||||
async with websockets.connect(TRUENAS_WS_URL, ssl=ssl_ctx) as ws:
|
||||
await ws.send(json.dumps({"msg": "connect", "version": "1", "support": ["1"]}))
|
||||
connected = json.loads(await ws.recv())
|
||||
if connected.get("msg") != "connected":
|
||||
raise RuntimeError("failed to connect")
|
||||
await ws.send(json.dumps({"id": 1, "msg": "method", "method": "auth.login_with_api_key", "params": [TRUENAS_API_KEY]}))
|
||||
auth = json.loads(await ws.recv())
|
||||
if not auth.get("result"):
|
||||
raise RuntimeError("auth failed")
|
||||
await ws.send(json.dumps({"id": 2, "msg": "method", "method": method, "params": params or []}))
|
||||
while True:
|
||||
raw = json.loads(await ws.recv())
|
||||
if raw.get("id") != 2:
|
||||
continue
|
||||
if raw.get("msg") == "error":
|
||||
raise RuntimeError(raw.get("error"))
|
||||
return raw.get("result")
|
||||
|
||||
|
||||
def _get_models() -> List[str]:
|
||||
_wait_for_http(WRAPPER_BASE + "/health")
|
||||
resp = requests.get(WRAPPER_BASE + "/v1/models", timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data") or []
|
||||
return [m.get("id") for m in data if m.get("id")]
|
||||
|
||||
|
||||
def _assert_chat_ok(resp_json: Dict) -> str:
|
||||
choices = resp_json.get("choices") or []
|
||||
assert choices, "no choices"
|
||||
message = choices[0].get("message") or {}
|
||||
text = message.get("content") or ""
|
||||
assert text.strip(), "empty content"
|
||||
return text
|
||||
|
||||
|
||||
def _wait_for_http(url: str, timeout_s: float = 90) -> None:
|
||||
deadline = time.time() + timeout_s
|
||||
last_err = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
resp = requests.get(url, timeout=5)
|
||||
if resp.status_code == 200:
|
||||
return
|
||||
last_err = f"status {resp.status_code}"
|
||||
except Exception as exc:
|
||||
last_err = str(exc)
|
||||
time.sleep(2)
|
||||
raise RuntimeError(f"service not ready: {url} ({last_err})")
|
||||
|
||||
|
||||
def _post_with_retry(url: str, payload: Dict, timeout_s: float = 300, retries: int = 6, delay_s: float = 5.0):
|
||||
last = None
|
||||
for _ in range(retries):
|
||||
try:
|
||||
resp = requests.post(url, json=payload, timeout=timeout_s)
|
||||
if resp.status_code == 200:
|
||||
return resp
|
||||
last = resp
|
||||
except requests.exceptions.RequestException as exc:
|
||||
last = exc
|
||||
time.sleep(delay_s)
|
||||
if isinstance(last, Exception):
|
||||
raise last
|
||||
return last
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_active_model_and_multi_gpu_flags():
|
||||
cfg = await _rpc_call("app.config", [TRUENAS_APP_NAME])
|
||||
command = cfg.get("command") or []
|
||||
assert "--model" in command
|
||||
assert "--tensor-split" in command
|
||||
split_idx = command.index("--tensor-split") + 1
|
||||
split = command[split_idx]
|
||||
assert "," in split, f"tensor-split missing commas: {split}"
|
||||
assert "--split-mode" in command
|
||||
|
||||
|
||||
def test_models_listed():
|
||||
models = _get_models()
|
||||
assert models, "no models discovered"
|
||||
|
||||
|
||||
def test_chat_completions_switch_and_prompts():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
if MODEL_REQUEST:
|
||||
assert MODEL_REQUEST in models, f"MODEL_REQUEST not found: {MODEL_REQUEST}"
|
||||
model_id = MODEL_REQUEST
|
||||
else:
|
||||
model_id = models[0]
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": "Say OK."}],
|
||||
"max_tokens": 12,
|
||||
"temperature": 0,
|
||||
}
|
||||
for _ in range(3):
|
||||
resp = _post_with_retry(WRAPPER_BASE + "/v1/chat/completions", payload)
|
||||
assert resp.status_code == 200
|
||||
_assert_chat_ok(resp.json())
|
||||
|
||||
|
||||
def test_tools_flat_format():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
if MODEL_REQUEST:
|
||||
assert MODEL_REQUEST in models, f"MODEL_REQUEST not found: {MODEL_REQUEST}"
|
||||
model_id = MODEL_REQUEST
|
||||
else:
|
||||
model_id = models[0]
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": "Say OK and do not call tools."}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "format_final_json_response",
|
||||
"description": "format output",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"ok": {"type": "boolean"}},
|
||||
"required": ["ok"],
|
||||
},
|
||||
}
|
||||
],
|
||||
"max_tokens": 12,
|
||||
}
|
||||
resp = _post_with_retry(WRAPPER_BASE + "/v1/chat/completions", payload)
|
||||
assert resp.status_code == 200
|
||||
_assert_chat_ok(resp.json())
|
||||
|
||||
|
||||
def test_functions_payload_normalized():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
if MODEL_REQUEST:
|
||||
assert MODEL_REQUEST in models, f"MODEL_REQUEST not found: {MODEL_REQUEST}"
|
||||
model_id = MODEL_REQUEST
|
||||
else:
|
||||
model_id = models[0]
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": "Say OK and do not call tools."}],
|
||||
"functions": [
|
||||
{
|
||||
"name": "format_final_json_response",
|
||||
"description": "format output",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"ok": {"type": "boolean"}},
|
||||
"required": ["ok"],
|
||||
},
|
||||
}
|
||||
],
|
||||
"max_tokens": 12,
|
||||
}
|
||||
resp = _post_with_retry(WRAPPER_BASE + "/v1/chat/completions", payload)
|
||||
assert resp.status_code == 200
|
||||
_assert_chat_ok(resp.json())
|
||||
|
||||
|
||||
def test_return_format_json():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
if MODEL_REQUEST:
|
||||
assert MODEL_REQUEST in models, f"MODEL_REQUEST not found: {MODEL_REQUEST}"
|
||||
model_id = MODEL_REQUEST
|
||||
else:
|
||||
model_id = models[0]
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": "Return JSON with key ok true."}],
|
||||
"return_format": "json",
|
||||
"max_tokens": 32,
|
||||
"temperature": 0,
|
||||
}
|
||||
resp = _post_with_retry(WRAPPER_BASE + "/v1/chat/completions", payload)
|
||||
assert resp.status_code == 200
|
||||
text = _assert_chat_ok(resp.json())
|
||||
parsed = json.loads(text)
|
||||
assert isinstance(parsed, dict)
|
||||
|
||||
|
||||
def test_responses_endpoint():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
if MODEL_REQUEST:
|
||||
assert MODEL_REQUEST in models, f"MODEL_REQUEST not found: {MODEL_REQUEST}"
|
||||
model_id = MODEL_REQUEST
|
||||
else:
|
||||
model_id = models[0]
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"input": "Say OK.",
|
||||
"max_output_tokens": 16,
|
||||
}
|
||||
resp = _post_with_retry(WRAPPER_BASE + "/v1/responses", payload)
|
||||
assert resp.status_code == 200
|
||||
output = resp.json().get("output") or []
|
||||
assert output, "responses output empty"
|
||||
content = output[0].get("content") or []
|
||||
text = content[0].get("text") if content else ""
|
||||
assert text and text.strip()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_model_switch_applied_to_truenas():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
target = MODEL_REQUEST or models[0]
|
||||
assert target in models, f"MODEL_REQUEST not found: {target}"
|
||||
resp = requests.post(UI_BASE + "/ui/api/switch-model", json={"model_id": target, "warmup_prompt": "warmup"}, timeout=600)
|
||||
assert resp.status_code == 200
|
||||
cfg = await _rpc_call("app.config", [TRUENAS_APP_NAME])
|
||||
command = cfg.get("command") or []
|
||||
assert "--model" in command
|
||||
model_path = command[command.index("--model") + 1]
|
||||
assert model_path.endswith(target)
|
||||
|
||||
|
||||
def test_invalid_model_rejected():
|
||||
models = _get_models()
|
||||
assert models, "no models"
|
||||
payload = {
|
||||
"model": "modelx-q8:4b",
|
||||
"messages": [{"role": "user", "content": "Say OK."}],
|
||||
"max_tokens": 8,
|
||||
"temperature": 0,
|
||||
}
|
||||
resp = requests.post(WRAPPER_BASE + "/v1/chat/completions", json=payload, timeout=60)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_llamacpp_logs_streaming():
|
||||
logs = ""
|
||||
for _ in range(5):
|
||||
try:
|
||||
resp = requests.get(UI_BASE + "/ui/api/llamacpp-logs", timeout=10)
|
||||
if resp.status_code == 200:
|
||||
logs = resp.json().get("logs") or ""
|
||||
if logs.strip():
|
||||
break
|
||||
except requests.exceptions.ReadTimeout:
|
||||
pass
|
||||
time.sleep(2)
|
||||
assert logs.strip(), "no logs returned"
|
||||
|
||||
# Force a log line before streaming.
|
||||
try:
|
||||
requests.get(WRAPPER_BASE + "/proxy/llamacpp/health", timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Stream endpoint may not emit immediately, so validate that the endpoint responds.
|
||||
with requests.get(UI_BASE + "/ui/api/llamacpp-logs/stream", stream=True, timeout=(5, 5)) as resp:
|
||||
assert resp.status_code == 200
|
||||
55
tests/test_responses.py
Normal file
55
tests/test_responses.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import json
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_responses_non_stream(api_client, respx_mock, case):
|
||||
respx_mock.get("http://llama.test/v1/models").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]})
|
||||
)
|
||||
respx_mock.post("http://llama.test/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json={"choices": [{"message": {"content": f"reply-{case}"}}]})
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": "model-a.gguf",
|
||||
"input": f"prompt-{case}",
|
||||
"max_output_tokens": 32,
|
||||
}
|
||||
resp = api_client.post("/v1/responses", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["object"] == "response"
|
||||
assert data["output"][0]["content"][0]["text"].startswith("reply-")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
def test_responses_stream(api_client, respx_mock, case):
|
||||
respx_mock.get("http://llama.test/v1/models").mock(
|
||||
return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]})
|
||||
)
|
||||
|
||||
def stream_response(request):
|
||||
payload = {
|
||||
"id": "chunk",
|
||||
"object": "chat.completion.chunk",
|
||||
"choices": [{"delta": {"content": f"hi-{case}"}, "index": 0, "finish_reason": None}],
|
||||
}
|
||||
content = f"data: {json.dumps(payload)}\n\n".encode()
|
||||
content += b"data: [DONE]\n\n"
|
||||
return httpx.Response(200, content=content, headers={"Content-Type": "text/event-stream"})
|
||||
|
||||
respx_mock.post("http://llama.test/v1/chat/completions").mock(side_effect=stream_response)
|
||||
|
||||
payload = {
|
||||
"model": "model-a.gguf",
|
||||
"input": f"prompt-{case}",
|
||||
"stream": True,
|
||||
}
|
||||
with api_client.stream("POST", "/v1/responses", json=payload) as resp:
|
||||
assert resp.status_code == 200
|
||||
body = b"".join(resp.iter_bytes())
|
||||
assert b"event: response.created" in body
|
||||
assert b"event: response.output_text.delta" in body
|
||||
assert b"event: response.completed" in body
|
||||
54
tests/test_truenas_switch.py
Normal file
54
tests/test_truenas_switch.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from app.truenas_middleware import TrueNASConfig, switch_model
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("case", list(range(120)))
|
||||
async def test_switch_model_updates_command(monkeypatch, case):
|
||||
compose = {
|
||||
"services": {
|
||||
"llamacpp": {
|
||||
"command": [
|
||||
"--model",
|
||||
"/models/old.gguf",
|
||||
"--ctx-size",
|
||||
"2048",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
captured = {}
|
||||
|
||||
async def fake_rpc_call(cfg, method, params=None):
|
||||
if method == "app.config":
|
||||
return {"custom_compose_config": compose}
|
||||
if method == "app.update":
|
||||
captured["payload"] = params[1]
|
||||
return {"state": "RUNNING"}
|
||||
raise AssertionError(f"unexpected method {method}")
|
||||
|
||||
monkeypatch.setattr("app.truenas_middleware._rpc_call", fake_rpc_call)
|
||||
|
||||
cfg = TrueNASConfig(
|
||||
ws_url="ws://truenas.test/websocket",
|
||||
api_key="key",
|
||||
api_user=None,
|
||||
app_name="llamacpp",
|
||||
verify_ssl=False,
|
||||
)
|
||||
|
||||
await switch_model(
|
||||
cfg,
|
||||
f"/models/new-{case}.gguf",
|
||||
{"n_gpu_layers": "999"},
|
||||
"--flash-attn on",
|
||||
)
|
||||
|
||||
assert "custom_compose_config" in captured["payload"]
|
||||
cmd = captured["payload"]["custom_compose_config"]["services"]["llamacpp"]["command"]
|
||||
assert "--model" in cmd
|
||||
idx = cmd.index("--model")
|
||||
assert cmd[idx + 1].endswith(f"new-{case}.gguf")
|
||||
48
tests/test_ui.py
Normal file
48
tests/test_ui.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
UI_BASE = os.getenv("UI_BASE", "http://192.168.1.2:9094")
|
||||
|
||||
def _wait_for_http(url: str, timeout_s: float = 90) -> None:
|
||||
deadline = time.time() + timeout_s
|
||||
last_err = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
resp = requests.get(url, timeout=5)
|
||||
if resp.status_code == 200:
|
||||
return
|
||||
last_err = f"status {resp.status_code}"
|
||||
except Exception as exc:
|
||||
last_err = str(exc)
|
||||
time.sleep(2)
|
||||
raise RuntimeError(f"service not ready: {url} ({last_err})")
|
||||
|
||||
|
||||
def test_ui_index_contains_expected_elements():
|
||||
_wait_for_http(UI_BASE + "/health")
|
||||
resp = requests.get(UI_BASE + "/", timeout=30)
|
||||
assert resp.status_code == 200
|
||||
html = resp.text
|
||||
assert "Model Manager" in html
|
||||
assert "id=\"download-form\"" in html
|
||||
assert "id=\"models-list\"" in html
|
||||
assert "id=\"logs-output\"" in html
|
||||
assert "id=\"theme-toggle\"" in html
|
||||
|
||||
|
||||
def test_ui_assets_available():
|
||||
resp = requests.get(UI_BASE + "/ui/styles.css", timeout=30)
|
||||
assert resp.status_code == 200
|
||||
css = resp.text
|
||||
assert "data-theme" in css
|
||||
|
||||
resp = requests.get(UI_BASE + "/ui/app.js", timeout=30)
|
||||
assert resp.status_code == 200
|
||||
js = resp.text
|
||||
assert "themeToggle" in js
|
||||
assert "localStorage" in js
|
||||
assert "logs-output" in js
|
||||
Reference in New Issue
Block a user