Initial commit
This commit is contained in:
102
llamaCpp.Wrapper.app/stream_transform.py
Normal file
102
llamaCpp.Wrapper.app/stream_transform.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, AsyncIterator, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
def _sse_event(event: str, data: Dict[str, Any]) -> bytes:
|
||||
payload = json.dumps(data, separators=(",", ":"))
|
||||
return f"event: {event}\ndata: {payload}\n\n".encode("utf-8")
|
||||
|
||||
def _filter_headers(headers: Dict[str, str]) -> Dict[str, str]:
|
||||
drop = {"host", "content-length"}
|
||||
return {k: v for k, v in headers.items() if k.lower() not in drop}
|
||||
|
||||
|
||||
async def stream_chat_to_responses(
|
||||
base_url: str,
|
||||
headers: Dict[str, str],
|
||||
payload: Dict[str, Any],
|
||||
timeout_s: float,
|
||||
) -> AsyncIterator[bytes]:
|
||||
response_id = f"resp_{uuid.uuid4().hex}"
|
||||
created = int(time.time())
|
||||
model = payload.get("model") or "unknown"
|
||||
msg_id = f"msg_{uuid.uuid4().hex}"
|
||||
output_text = ""
|
||||
|
||||
response_stub = {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"created": created,
|
||||
"model": model,
|
||||
"output": [
|
||||
{
|
||||
"id": msg_id,
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": ""}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
yield _sse_event("response.created", {"type": "response.created", "response": response_stub})
|
||||
|
||||
async with httpx.AsyncClient(base_url=base_url, timeout=timeout_s) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
"/v1/chat/completions",
|
||||
headers=_filter_headers(headers),
|
||||
json=payload,
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
buffer = ""
|
||||
async for chunk in resp.aiter_text():
|
||||
buffer += chunk
|
||||
while "\n\n" in buffer:
|
||||
block, buffer = buffer.split("\n\n", 1)
|
||||
lines = [line for line in block.splitlines() if line.startswith("data:")]
|
||||
if not lines:
|
||||
continue
|
||||
data_str = "\n".join(line[len("data:"):].strip() for line in lines)
|
||||
if data_str == "[DONE]":
|
||||
continue
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
continue
|
||||
delta = choices[0].get("delta") or {}
|
||||
text_delta = delta.get("content")
|
||||
if text_delta:
|
||||
output_text += text_delta
|
||||
yield _sse_event(
|
||||
"response.output_text.delta",
|
||||
{
|
||||
"type": "response.output_text.delta",
|
||||
"delta": text_delta,
|
||||
"item_id": msg_id,
|
||||
"output_index": 0,
|
||||
"content_index": 0,
|
||||
},
|
||||
)
|
||||
|
||||
yield _sse_event(
|
||||
"response.output_text.done",
|
||||
{
|
||||
"type": "response.output_text.done",
|
||||
"text": output_text,
|
||||
"item_id": msg_id,
|
||||
"output_index": 0,
|
||||
"content_index": 0,
|
||||
},
|
||||
)
|
||||
|
||||
response_stub["output"][0]["content"][0]["text"] = output_text
|
||||
yield _sse_event("response.completed", {"type": "response.completed", "response": response_stub})
|
||||
Reference in New Issue
Block a user