{ "image": "ghcr.io/ggml-org/llama.cpp:server-cuda", "container_name": "ix-llamacpp-llamacpp-1", "host_port": 8071, "container_port": 8080, "web_ui_url": "http://0.0.0.0:8071/", "model_host_path": "/mnt/fast.storage.rushg.me/datasets/apps/llama-cpp.models", "model_container_path": "/models", "models": [ "GPT-OSS", "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", "openassistant-llama2-13b-orca-8k-3319.Q5_K_M.gguf", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" ], "network": "ix-llamacpp_default", "subnets": [ "172.16.18.0/24", "fdb7:86ec:b1dd:11::/64" ], "gpu_count": 2, "gpu_name": "NVIDIA RTX 5060 Ti, 16 GB each (per `nvidia-smi` in prior runs)." }