Initial commit
This commit is contained in:
162
scripts/remote_wrapper_test.py
Normal file
162
scripts/remote_wrapper_test.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
BASE = os.getenv("WRAPPER_BASE", "http://192.168.1.2:9000")
|
||||
UPSTREAM = os.getenv("LLAMACPP_BASE", "http://192.168.1.2:8071")
|
||||
RUNS = int(os.getenv("RUNS", "100"))
|
||||
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "4"))
|
||||
TIMEOUT = int(os.getenv("REQ_TIMEOUT", "300"))
|
||||
|
||||
|
||||
def _now():
|
||||
return datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
|
||||
def _get_loaded_model_id():
|
||||
deadline = time.time() + 600
|
||||
last_error = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
resp = requests.get(UPSTREAM + "/v1/models", timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data") or []
|
||||
if data:
|
||||
return data[0].get("id")
|
||||
last_error = "no models reported by upstream"
|
||||
except Exception as exc:
|
||||
last_error = str(exc)
|
||||
time.sleep(5)
|
||||
raise RuntimeError(f"upstream not ready: {last_error}")
|
||||
|
||||
|
||||
def _stream_ok(resp):
|
||||
got_data = False
|
||||
got_done = False
|
||||
for line in resp.iter_lines(decode_unicode=True):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data:"):
|
||||
got_data = True
|
||||
if line.strip() == "data: [DONE]":
|
||||
got_done = True
|
||||
break
|
||||
return got_data, got_done
|
||||
|
||||
|
||||
def run_suite(model_id, idx):
|
||||
results = {}
|
||||
|
||||
# Models
|
||||
r = requests.get(BASE + "/v1/models", timeout=30)
|
||||
results["models"] = r.status_code
|
||||
|
||||
r = requests.get(BASE + f"/v1/models/{model_id}", timeout=30)
|
||||
results["model_get"] = r.status_code
|
||||
|
||||
# Chat completions non-stream
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": [{"role": "user", "content": f"Run {idx}: say ok."}],
|
||||
"max_tokens": MAX_TOKENS,
|
||||
"temperature": (idx % 5) / 10.0,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/chat/completions", json=payload, timeout=TIMEOUT)
|
||||
results["chat"] = r.status_code
|
||||
|
||||
# Chat completions stream
|
||||
payload_stream = dict(payload)
|
||||
payload_stream["stream"] = True
|
||||
r = requests.post(BASE + "/v1/chat/completions", json=payload_stream, stream=True, timeout=TIMEOUT)
|
||||
ok_data, ok_done = _stream_ok(r)
|
||||
results["chat_stream"] = r.status_code
|
||||
results["chat_stream_ok"] = ok_data and ok_done
|
||||
|
||||
# Responses non-stream
|
||||
payload_resp = {
|
||||
"model": model_id,
|
||||
"input": f"Run {idx}: say ok.",
|
||||
"max_output_tokens": MAX_TOKENS,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/responses", json=payload_resp, timeout=TIMEOUT)
|
||||
results["responses"] = r.status_code
|
||||
|
||||
# Responses stream
|
||||
payload_resp_stream = {
|
||||
"model": model_id,
|
||||
"input": f"Run {idx}: say ok.",
|
||||
"stream": True,
|
||||
}
|
||||
r = requests.post(BASE + "/v1/responses", json=payload_resp_stream, stream=True, timeout=TIMEOUT)
|
||||
ok_data, ok_done = _stream_ok(r)
|
||||
results["responses_stream"] = r.status_code
|
||||
results["responses_stream_ok"] = ok_data and ok_done
|
||||
|
||||
# Embeddings (best effort)
|
||||
payload_emb = {"model": model_id, "input": f"Run {idx}"}
|
||||
r = requests.post(BASE + "/v1/embeddings", json=payload_emb, timeout=TIMEOUT)
|
||||
results["embeddings"] = r.status_code
|
||||
|
||||
# Proxy
|
||||
r = requests.post(BASE + "/proxy/llamacpp/v1/chat/completions", json=payload, timeout=TIMEOUT)
|
||||
results["proxy"] = r.status_code
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
summary = {
|
||||
"started_at": _now(),
|
||||
"base": BASE,
|
||||
"upstream": UPSTREAM,
|
||||
"runs": RUNS,
|
||||
"max_tokens": MAX_TOKENS,
|
||||
"results": [],
|
||||
}
|
||||
|
||||
model_id = _get_loaded_model_id()
|
||||
summary["model_id"] = model_id
|
||||
|
||||
for i in range(1, RUNS + 1):
|
||||
start = time.time()
|
||||
try:
|
||||
results = run_suite(model_id, i)
|
||||
ok = all(
|
||||
results.get(key) == 200
|
||||
for key in ("models", "model_get", "chat", "chat_stream", "responses", "responses_stream", "proxy")
|
||||
)
|
||||
stream_ok = results.get("chat_stream_ok") and results.get("responses_stream_ok")
|
||||
summary["results"].append({
|
||||
"run": i,
|
||||
"ok": ok and stream_ok,
|
||||
"stream_ok": stream_ok,
|
||||
"status": results,
|
||||
"elapsed_s": round(time.time() - start, 2),
|
||||
})
|
||||
except Exception as exc:
|
||||
summary["results"].append({
|
||||
"run": i,
|
||||
"ok": False,
|
||||
"stream_ok": False,
|
||||
"error": str(exc),
|
||||
"elapsed_s": round(time.time() - start, 2),
|
||||
})
|
||||
print(f"Run {i}/{RUNS} done")
|
||||
|
||||
summary["finished_at"] = _now()
|
||||
|
||||
os.makedirs("reports", exist_ok=True)
|
||||
out_path = os.path.join("reports", "remote_wrapper_test.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
|
||||
# Print a compact summary
|
||||
ok_count = sum(1 for r in summary["results"] if r.get("ok"))
|
||||
print(f"OK {ok_count}/{RUNS}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user