Initial commit
This commit is contained in:
214
llamaCpp.Wrapper.app/config.py
Normal file
214
llamaCpp.Wrapper.app/config.py
Normal file
@@ -0,0 +1,214 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentsRuntime:
|
||||
image: Optional[str]
|
||||
container_name: Optional[str]
|
||||
host_port: Optional[int]
|
||||
container_port: Optional[int]
|
||||
web_ui_url: Optional[str]
|
||||
model_host_path: Optional[str]
|
||||
model_container_path: Optional[str]
|
||||
models: List[str]
|
||||
network: Optional[str]
|
||||
subnets: List[str]
|
||||
gpu_count: Optional[int]
|
||||
gpu_name: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
api_port: int
|
||||
ui_port: int
|
||||
base_url: str
|
||||
model_dir: str
|
||||
model_container_dir: str
|
||||
download_dir: str
|
||||
download_max_concurrent: int
|
||||
download_allowlist: List[str]
|
||||
restart_method: str
|
||||
restart_command: Optional[str]
|
||||
restart_url: Optional[str]
|
||||
reload_on_new_model: bool
|
||||
proxy_timeout_s: float
|
||||
switch_timeout_s: float
|
||||
gpu_count_runtime: Optional[int]
|
||||
llamacpp_args: Dict[str, str]
|
||||
llamacpp_extra_args: str
|
||||
truenas_api_key: Optional[str]
|
||||
truenas_api_user: Optional[str]
|
||||
truenas_app_name: str
|
||||
truenas_ws_url: Optional[str]
|
||||
truenas_verify_ssl: bool
|
||||
allowed_container: Optional[str]
|
||||
warmup_prompt_path: str
|
||||
llamacpp_container_name: Optional[str]
|
||||
model_aliases: Dict[str, str]
|
||||
agents: AgentsRuntime
|
||||
|
||||
|
||||
def _load_agents_config(path: Path) -> AgentsRuntime:
|
||||
if not path.exists():
|
||||
return AgentsRuntime(
|
||||
image=None,
|
||||
container_name=None,
|
||||
host_port=None,
|
||||
container_port=None,
|
||||
web_ui_url=None,
|
||||
model_host_path=None,
|
||||
model_container_path=None,
|
||||
models=[],
|
||||
network=None,
|
||||
subnets=[],
|
||||
gpu_count=None,
|
||||
gpu_name=None,
|
||||
)
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
return AgentsRuntime(
|
||||
image=raw.get("image"),
|
||||
container_name=raw.get("container_name"),
|
||||
host_port=raw.get("host_port"),
|
||||
container_port=raw.get("container_port"),
|
||||
web_ui_url=raw.get("web_ui_url"),
|
||||
model_host_path=raw.get("model_host_path"),
|
||||
model_container_path=raw.get("model_container_path"),
|
||||
models=raw.get("models") or [],
|
||||
network=raw.get("network"),
|
||||
subnets=raw.get("subnets") or [],
|
||||
gpu_count=raw.get("gpu_count"),
|
||||
gpu_name=raw.get("gpu_name"),
|
||||
)
|
||||
|
||||
|
||||
def _infer_gpu_count_runtime() -> Optional[int]:
|
||||
visible = os.getenv("CUDA_VISIBLE_DEVICES") or os.getenv("NVIDIA_VISIBLE_DEVICES")
|
||||
if visible and visible not in {"all", "void"}:
|
||||
parts = [p.strip() for p in visible.split(",") if p.strip()]
|
||||
if parts:
|
||||
return len(parts)
|
||||
return None
|
||||
|
||||
|
||||
def _default_base_url(agents: AgentsRuntime) -> str:
|
||||
if agents.container_name and agents.container_port:
|
||||
return f"http://{agents.container_name}:{agents.container_port}"
|
||||
if agents.host_port:
|
||||
return f"http://127.0.0.1:{agents.host_port}"
|
||||
return "http://127.0.0.1:8080"
|
||||
|
||||
|
||||
def load_config() -> AppConfig:
|
||||
agents_path = Path(os.getenv("AGENTS_CONFIG_PATH", "app/agents_config.json"))
|
||||
agents = _load_agents_config(agents_path)
|
||||
|
||||
api_port = int(os.getenv("PORT_A", "9093"))
|
||||
ui_port = int(os.getenv("PORT_B", "9094"))
|
||||
|
||||
base_url = os.getenv("LLAMACPP_BASE_URL") or _default_base_url(agents)
|
||||
model_dir = os.getenv("MODEL_DIR") or agents.model_container_path or "/models"
|
||||
model_container_dir = os.getenv("MODEL_CONTAINER_DIR") or model_dir
|
||||
|
||||
download_dir = os.getenv("MODEL_DOWNLOAD_DIR") or model_dir
|
||||
download_max = int(os.getenv("MODEL_DOWNLOAD_MAX_CONCURRENT", "2"))
|
||||
|
||||
allowlist_raw = os.getenv("MODEL_DOWNLOAD_ALLOWLIST", "")
|
||||
allowlist = [item.strip() for item in allowlist_raw.split(",") if item.strip()]
|
||||
|
||||
restart_method = os.getenv("LLAMACPP_RESTART_METHOD", "none").lower()
|
||||
restart_command = os.getenv("LLAMACPP_RESTART_COMMAND")
|
||||
restart_url = os.getenv("LLAMACPP_RESTART_URL")
|
||||
|
||||
reload_on_new_model = os.getenv("RELOAD_ON_NEW_MODEL", "false").lower() in {"1", "true", "yes"}
|
||||
proxy_timeout_s = float(os.getenv("LLAMACPP_PROXY_TIMEOUT_S", "600"))
|
||||
switch_timeout_s = float(os.getenv("LLAMACPP_SWITCH_TIMEOUT_S", "300"))
|
||||
|
||||
gpu_count_runtime = _infer_gpu_count_runtime()
|
||||
|
||||
llamacpp_args = {}
|
||||
args_map = {
|
||||
"LLAMACPP_TENSOR_SPLIT": "tensor_split",
|
||||
"LLAMACPP_SPLIT_MODE": "split_mode",
|
||||
"LLAMACPP_N_GPU_LAYERS": "n_gpu_layers",
|
||||
"LLAMACPP_CTX_SIZE": "ctx_size",
|
||||
"LLAMACPP_BATCH_SIZE": "batch_size",
|
||||
"LLAMACPP_UBATCH_SIZE": "ubatch_size",
|
||||
"LLAMACPP_CACHE_TYPE_K": "cache_type_k",
|
||||
"LLAMACPP_CACHE_TYPE_V": "cache_type_v",
|
||||
"LLAMACPP_FLASH_ATTN": "flash_attn",
|
||||
}
|
||||
for env_key, arg_key in args_map.items():
|
||||
value = os.getenv(env_key)
|
||||
if value is not None and value != "":
|
||||
llamacpp_args[arg_key] = value
|
||||
llamacpp_extra_args = os.getenv("LLAMACPP_EXTRA_ARGS", "")
|
||||
|
||||
truenas_api_key = os.getenv("TRUENAS_API_KEY")
|
||||
truenas_api_user = os.getenv("TRUENAS_API_USER")
|
||||
truenas_app_name = os.getenv("TRUENAS_APP_NAME", "llamacpp")
|
||||
truenas_ws_url = os.getenv("TRUENAS_WS_URL")
|
||||
truenas_api_url = os.getenv("TRUENAS_API_URL")
|
||||
if not truenas_ws_url and truenas_api_url:
|
||||
if truenas_api_url.startswith("https://"):
|
||||
truenas_ws_url = "wss://" + truenas_api_url[len("https://") :].rstrip("/") + "/websocket"
|
||||
elif truenas_api_url.startswith("http://"):
|
||||
truenas_ws_url = "ws://" + truenas_api_url[len("http://") :].rstrip("/") + "/websocket"
|
||||
truenas_verify_ssl = os.getenv("TRUENAS_VERIFY_SSL", "false").lower() in {"1", "true", "yes"}
|
||||
allowed_container = os.getenv("LLAMACPP_TARGET_CONTAINER") or agents.container_name
|
||||
llamacpp_container_name = os.getenv("LLAMACPP_CONTAINER_NAME") or agents.container_name
|
||||
warmup_prompt_path = os.getenv("WARMUP_PROMPT_PATH", str(Path("trades_company_stock.txt").resolve()))
|
||||
if truenas_ws_url and (":" in model_container_dir[:3] or "\\" in model_container_dir):
|
||||
model_container_dir = os.getenv("MODEL_CONTAINER_DIR") or "/models"
|
||||
aliases_raw = os.getenv("MODEL_ALIASES", "")
|
||||
model_aliases: Dict[str, str] = {}
|
||||
if aliases_raw:
|
||||
try:
|
||||
model_aliases = json.loads(aliases_raw)
|
||||
except json.JSONDecodeError:
|
||||
for item in aliases_raw.split(","):
|
||||
if "=" in item:
|
||||
key, value = item.split("=", 1)
|
||||
model_aliases[key.strip()] = value.strip()
|
||||
|
||||
gpu_count = gpu_count_runtime or agents.gpu_count
|
||||
if gpu_count and gpu_count >= 2:
|
||||
if "tensor_split" not in llamacpp_args:
|
||||
ratio = 1.0 / float(gpu_count)
|
||||
split = ",".join([f"{ratio:.2f}"] * gpu_count)
|
||||
llamacpp_args["tensor_split"] = split
|
||||
if "split_mode" not in llamacpp_args:
|
||||
llamacpp_args["split_mode"] = "layer"
|
||||
|
||||
return AppConfig(
|
||||
api_port=api_port,
|
||||
ui_port=ui_port,
|
||||
base_url=base_url,
|
||||
model_dir=model_dir,
|
||||
model_container_dir=model_container_dir,
|
||||
download_dir=download_dir,
|
||||
download_max_concurrent=download_max,
|
||||
download_allowlist=allowlist,
|
||||
restart_method=restart_method,
|
||||
restart_command=restart_command,
|
||||
restart_url=restart_url,
|
||||
reload_on_new_model=reload_on_new_model,
|
||||
proxy_timeout_s=proxy_timeout_s,
|
||||
switch_timeout_s=switch_timeout_s,
|
||||
gpu_count_runtime=gpu_count_runtime,
|
||||
llamacpp_args=llamacpp_args,
|
||||
llamacpp_extra_args=llamacpp_extra_args,
|
||||
truenas_api_key=truenas_api_key,
|
||||
truenas_api_user=truenas_api_user,
|
||||
truenas_app_name=truenas_app_name,
|
||||
truenas_ws_url=truenas_ws_url,
|
||||
truenas_verify_ssl=truenas_verify_ssl,
|
||||
allowed_container=allowed_container,
|
||||
warmup_prompt_path=warmup_prompt_path,
|
||||
llamacpp_container_name=llamacpp_container_name,
|
||||
model_aliases=model_aliases,
|
||||
agents=agents,
|
||||
)
|
||||
Reference in New Issue
Block a user