import json import pytest import httpx @pytest.mark.parametrize("case", list(range(120))) def test_responses_non_stream(api_client, respx_mock, case): respx_mock.get("http://llama.test/v1/models").mock( return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]}) ) respx_mock.post("http://llama.test/v1/chat/completions").mock( return_value=httpx.Response(200, json={"choices": [{"message": {"content": f"reply-{case}"}}]}) ) payload = { "model": "model-a.gguf", "input": f"prompt-{case}", "max_output_tokens": 32, } resp = api_client.post("/v1/responses", json=payload) assert resp.status_code == 200 data = resp.json() assert data["object"] == "response" assert data["output"][0]["content"][0]["text"].startswith("reply-") @pytest.mark.parametrize("case", list(range(120))) def test_responses_stream(api_client, respx_mock, case): respx_mock.get("http://llama.test/v1/models").mock( return_value=httpx.Response(200, json={"data": [{"id": "model-a.gguf"}]}) ) def stream_response(request): payload = { "id": "chunk", "object": "chat.completion.chunk", "choices": [{"delta": {"content": f"hi-{case}"}, "index": 0, "finish_reason": None}], } content = f"data: {json.dumps(payload)}\n\n".encode() content += b"data: [DONE]\n\n" return httpx.Response(200, content=content, headers={"Content-Type": "text/event-stream"}) respx_mock.post("http://llama.test/v1/chat/completions").mock(side_effect=stream_response) payload = { "model": "model-a.gguf", "input": f"prompt-{case}", "stream": True, } with api_client.stream("POST", "/v1/responses", json=payload) as resp: assert resp.status_code == 200 body = b"".join(resp.iter_bytes()) assert b"event: response.created" in body assert b"event: response.output_text.delta" in body assert b"event: response.completed" in body