import logging from typing import AsyncIterator, Dict, Optional import httpx log = logging.getLogger("llamacpp_client") def _filter_headers(headers: Dict[str, str]) -> Dict[str, str]: drop = {"host", "content-length"} return {k: v for k, v in headers.items() if k.lower() not in drop} async def proxy_json( base_url: str, path: str, method: str, headers: Dict[str, str], payload: Optional[dict], timeout_s: float, ) -> httpx.Response: async with httpx.AsyncClient(base_url=base_url, timeout=timeout_s) as client: return await client.request(method, path, headers=_filter_headers(headers), json=payload) async def proxy_raw( base_url: str, path: str, method: str, headers: Dict[str, str], body: Optional[bytes], timeout_s: float, ) -> httpx.Response: async with httpx.AsyncClient(base_url=base_url, timeout=timeout_s) as client: return await client.request(method, path, headers=_filter_headers(headers), content=body) async def proxy_stream( base_url: str, path: str, method: str, headers: Dict[str, str], payload: Optional[dict], timeout_s: float, ) -> AsyncIterator[bytes]: async with httpx.AsyncClient(base_url=base_url, timeout=timeout_s) as client: async with client.stream(method, path, headers=_filter_headers(headers), json=payload) as resp: resp.raise_for_status() async for chunk in resp.aiter_bytes(): if chunk: yield chunk