Initial commit

This commit is contained in:
Rushabh Gosar
2026-01-07 16:54:39 -08:00
commit 5d1a0ee72b
53 changed files with 9885 additions and 0 deletions

View File

@@ -0,0 +1,162 @@
import json
import os
import time
from datetime import datetime
import requests
BASE = os.getenv("WRAPPER_BASE", "http://192.168.1.2:9000")
UPSTREAM = os.getenv("LLAMACPP_BASE", "http://192.168.1.2:8071")
RUNS = int(os.getenv("RUNS", "100"))
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "4"))
TIMEOUT = int(os.getenv("REQ_TIMEOUT", "300"))
def _now():
return datetime.utcnow().isoformat() + "Z"
def _get_loaded_model_id():
deadline = time.time() + 600
last_error = None
while time.time() < deadline:
try:
resp = requests.get(UPSTREAM + "/v1/models", timeout=30)
resp.raise_for_status()
data = resp.json().get("data") or []
if data:
return data[0].get("id")
last_error = "no models reported by upstream"
except Exception as exc:
last_error = str(exc)
time.sleep(5)
raise RuntimeError(f"upstream not ready: {last_error}")
def _stream_ok(resp):
got_data = False
got_done = False
for line in resp.iter_lines(decode_unicode=True):
if not line:
continue
if line.startswith("data:"):
got_data = True
if line.strip() == "data: [DONE]":
got_done = True
break
return got_data, got_done
def run_suite(model_id, idx):
results = {}
# Models
r = requests.get(BASE + "/v1/models", timeout=30)
results["models"] = r.status_code
r = requests.get(BASE + f"/v1/models/{model_id}", timeout=30)
results["model_get"] = r.status_code
# Chat completions non-stream
payload = {
"model": model_id,
"messages": [{"role": "user", "content": f"Run {idx}: say ok."}],
"max_tokens": MAX_TOKENS,
"temperature": (idx % 5) / 10.0,
}
r = requests.post(BASE + "/v1/chat/completions", json=payload, timeout=TIMEOUT)
results["chat"] = r.status_code
# Chat completions stream
payload_stream = dict(payload)
payload_stream["stream"] = True
r = requests.post(BASE + "/v1/chat/completions", json=payload_stream, stream=True, timeout=TIMEOUT)
ok_data, ok_done = _stream_ok(r)
results["chat_stream"] = r.status_code
results["chat_stream_ok"] = ok_data and ok_done
# Responses non-stream
payload_resp = {
"model": model_id,
"input": f"Run {idx}: say ok.",
"max_output_tokens": MAX_TOKENS,
}
r = requests.post(BASE + "/v1/responses", json=payload_resp, timeout=TIMEOUT)
results["responses"] = r.status_code
# Responses stream
payload_resp_stream = {
"model": model_id,
"input": f"Run {idx}: say ok.",
"stream": True,
}
r = requests.post(BASE + "/v1/responses", json=payload_resp_stream, stream=True, timeout=TIMEOUT)
ok_data, ok_done = _stream_ok(r)
results["responses_stream"] = r.status_code
results["responses_stream_ok"] = ok_data and ok_done
# Embeddings (best effort)
payload_emb = {"model": model_id, "input": f"Run {idx}"}
r = requests.post(BASE + "/v1/embeddings", json=payload_emb, timeout=TIMEOUT)
results["embeddings"] = r.status_code
# Proxy
r = requests.post(BASE + "/proxy/llamacpp/v1/chat/completions", json=payload, timeout=TIMEOUT)
results["proxy"] = r.status_code
return results
def main():
summary = {
"started_at": _now(),
"base": BASE,
"upstream": UPSTREAM,
"runs": RUNS,
"max_tokens": MAX_TOKENS,
"results": [],
}
model_id = _get_loaded_model_id()
summary["model_id"] = model_id
for i in range(1, RUNS + 1):
start = time.time()
try:
results = run_suite(model_id, i)
ok = all(
results.get(key) == 200
for key in ("models", "model_get", "chat", "chat_stream", "responses", "responses_stream", "proxy")
)
stream_ok = results.get("chat_stream_ok") and results.get("responses_stream_ok")
summary["results"].append({
"run": i,
"ok": ok and stream_ok,
"stream_ok": stream_ok,
"status": results,
"elapsed_s": round(time.time() - start, 2),
})
except Exception as exc:
summary["results"].append({
"run": i,
"ok": False,
"stream_ok": False,
"error": str(exc),
"elapsed_s": round(time.time() - start, 2),
})
print(f"Run {i}/{RUNS} done")
summary["finished_at"] = _now()
os.makedirs("reports", exist_ok=True)
out_path = os.path.join("reports", "remote_wrapper_test.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(summary, f, indent=2)
# Print a compact summary
ok_count = sum(1 for r in summary["results"] if r.get("ok"))
print(f"OK {ok_count}/{RUNS}")
if __name__ == "__main__":
main()