import json import os from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional @dataclass class AgentsRuntime: image: Optional[str] container_name: Optional[str] host_port: Optional[int] container_port: Optional[int] web_ui_url: Optional[str] model_host_path: Optional[str] model_container_path: Optional[str] models: List[str] network: Optional[str] subnets: List[str] gpu_count: Optional[int] gpu_name: Optional[str] @dataclass class AppConfig: api_port: int ui_port: int base_url: str model_dir: str model_container_dir: str download_dir: str download_max_concurrent: int download_allowlist: List[str] restart_method: str restart_command: Optional[str] restart_url: Optional[str] reload_on_new_model: bool proxy_timeout_s: float switch_timeout_s: float gpu_count_runtime: Optional[int] llamacpp_args: Dict[str, str] llamacpp_extra_args: str truenas_api_key: Optional[str] truenas_api_user: Optional[str] truenas_app_name: str truenas_ws_url: Optional[str] truenas_verify_ssl: bool allowed_container: Optional[str] warmup_prompt_path: str llamacpp_container_name: Optional[str] model_aliases: Dict[str, str] agents: AgentsRuntime def _load_agents_config(path: Path) -> AgentsRuntime: if not path.exists(): return AgentsRuntime( image=None, container_name=None, host_port=None, container_port=None, web_ui_url=None, model_host_path=None, model_container_path=None, models=[], network=None, subnets=[], gpu_count=None, gpu_name=None, ) raw = json.loads(path.read_text(encoding="utf-8")) return AgentsRuntime( image=raw.get("image"), container_name=raw.get("container_name"), host_port=raw.get("host_port"), container_port=raw.get("container_port"), web_ui_url=raw.get("web_ui_url"), model_host_path=raw.get("model_host_path"), model_container_path=raw.get("model_container_path"), models=raw.get("models") or [], network=raw.get("network"), subnets=raw.get("subnets") or [], gpu_count=raw.get("gpu_count"), gpu_name=raw.get("gpu_name"), ) def _infer_gpu_count_runtime() -> Optional[int]: visible = os.getenv("CUDA_VISIBLE_DEVICES") or os.getenv("NVIDIA_VISIBLE_DEVICES") if visible and visible not in {"all", "void"}: parts = [p.strip() for p in visible.split(",") if p.strip()] if parts: return len(parts) return None def _default_base_url(agents: AgentsRuntime) -> str: if agents.container_name and agents.container_port: return f"http://{agents.container_name}:{agents.container_port}" if agents.host_port: return f"http://127.0.0.1:{agents.host_port}" return "http://127.0.0.1:8080" def load_config() -> AppConfig: agents_path = Path(os.getenv("AGENTS_CONFIG_PATH", "app/agents_config.json")) agents = _load_agents_config(agents_path) api_port = int(os.getenv("PORT_A", "9093")) ui_port = int(os.getenv("PORT_B", "9094")) base_url = os.getenv("LLAMACPP_BASE_URL") or _default_base_url(agents) model_dir = os.getenv("MODEL_DIR") or agents.model_container_path or "/models" model_container_dir = os.getenv("MODEL_CONTAINER_DIR") or model_dir download_dir = os.getenv("MODEL_DOWNLOAD_DIR") or model_dir download_max = int(os.getenv("MODEL_DOWNLOAD_MAX_CONCURRENT", "2")) allowlist_raw = os.getenv("MODEL_DOWNLOAD_ALLOWLIST", "") allowlist = [item.strip() for item in allowlist_raw.split(",") if item.strip()] restart_method = os.getenv("LLAMACPP_RESTART_METHOD", "none").lower() restart_command = os.getenv("LLAMACPP_RESTART_COMMAND") restart_url = os.getenv("LLAMACPP_RESTART_URL") reload_on_new_model = os.getenv("RELOAD_ON_NEW_MODEL", "false").lower() in {"1", "true", "yes"} proxy_timeout_s = float(os.getenv("LLAMACPP_PROXY_TIMEOUT_S", "600")) switch_timeout_s = float(os.getenv("LLAMACPP_SWITCH_TIMEOUT_S", "300")) gpu_count_runtime = _infer_gpu_count_runtime() llamacpp_args = {} args_map = { "LLAMACPP_TENSOR_SPLIT": "tensor_split", "LLAMACPP_SPLIT_MODE": "split_mode", "LLAMACPP_N_GPU_LAYERS": "n_gpu_layers", "LLAMACPP_CTX_SIZE": "ctx_size", "LLAMACPP_BATCH_SIZE": "batch_size", "LLAMACPP_UBATCH_SIZE": "ubatch_size", "LLAMACPP_CACHE_TYPE_K": "cache_type_k", "LLAMACPP_CACHE_TYPE_V": "cache_type_v", "LLAMACPP_FLASH_ATTN": "flash_attn", } for env_key, arg_key in args_map.items(): value = os.getenv(env_key) if value is not None and value != "": llamacpp_args[arg_key] = value llamacpp_extra_args = os.getenv("LLAMACPP_EXTRA_ARGS", "") truenas_api_key = os.getenv("TRUENAS_API_KEY") truenas_api_user = os.getenv("TRUENAS_API_USER") truenas_app_name = os.getenv("TRUENAS_APP_NAME", "llamacpp") truenas_ws_url = os.getenv("TRUENAS_WS_URL") truenas_api_url = os.getenv("TRUENAS_API_URL") if not truenas_ws_url and truenas_api_url: if truenas_api_url.startswith("https://"): truenas_ws_url = "wss://" + truenas_api_url[len("https://") :].rstrip("/") + "/websocket" elif truenas_api_url.startswith("http://"): truenas_ws_url = "ws://" + truenas_api_url[len("http://") :].rstrip("/") + "/websocket" truenas_verify_ssl = os.getenv("TRUENAS_VERIFY_SSL", "false").lower() in {"1", "true", "yes"} allowed_container = os.getenv("LLAMACPP_TARGET_CONTAINER") or agents.container_name llamacpp_container_name = os.getenv("LLAMACPP_CONTAINER_NAME") or agents.container_name warmup_prompt_path = os.getenv("WARMUP_PROMPT_PATH", str(Path("trades_company_stock.txt").resolve())) if truenas_ws_url and (":" in model_container_dir[:3] or "\\" in model_container_dir): model_container_dir = os.getenv("MODEL_CONTAINER_DIR") or "/models" aliases_raw = os.getenv("MODEL_ALIASES", "") model_aliases: Dict[str, str] = {} if aliases_raw: try: model_aliases = json.loads(aliases_raw) except json.JSONDecodeError: for item in aliases_raw.split(","): if "=" in item: key, value = item.split("=", 1) model_aliases[key.strip()] = value.strip() gpu_count = gpu_count_runtime or agents.gpu_count if gpu_count and gpu_count >= 2: if "tensor_split" not in llamacpp_args: ratio = 1.0 / float(gpu_count) split = ",".join([f"{ratio:.2f}"] * gpu_count) llamacpp_args["tensor_split"] = split if "split_mode" not in llamacpp_args: llamacpp_args["split_mode"] = "layer" return AppConfig( api_port=api_port, ui_port=ui_port, base_url=base_url, model_dir=model_dir, model_container_dir=model_container_dir, download_dir=download_dir, download_max_concurrent=download_max, download_allowlist=allowlist, restart_method=restart_method, restart_command=restart_command, restart_url=restart_url, reload_on_new_model=reload_on_new_model, proxy_timeout_s=proxy_timeout_s, switch_timeout_s=switch_timeout_s, gpu_count_runtime=gpu_count_runtime, llamacpp_args=llamacpp_args, llamacpp_extra_args=llamacpp_extra_args, truenas_api_key=truenas_api_key, truenas_api_user=truenas_api_user, truenas_app_name=truenas_app_name, truenas_ws_url=truenas_ws_url, truenas_verify_ssl=truenas_verify_ssl, allowed_container=allowed_container, warmup_prompt_path=warmup_prompt_path, llamacpp_container_name=llamacpp_container_name, model_aliases=model_aliases, agents=agents, )