-
-
Notifications
You must be signed in to change notification settings - Fork 4k
Expand file tree
/
Copy pathdocker-compose.distributed.yaml
More file actions
217 lines (204 loc) · 7.45 KB
/
docker-compose.distributed.yaml
File metadata and controls
217 lines (204 loc) · 7.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Docker Compose for LocalAI Distributed Mode
#
# Starts a full distributed stack: PostgreSQL, NATS, a LocalAI frontend,
# and one llama-cpp backend node.
#
# Model files are transferred from the frontend to backend nodes via HTTP
# — no shared volumes needed between frontend and backends.
#
# Usage:
# docker compose -f docker-compose.distributed.yaml up
#
# See docs: https://localai.io/features/distributed-mode/
services:
# --- Infrastructure ---
postgres:
image: quay.io/mudler/localrecall:v0.5.5-postgresql # PostgreSQL with pgvector
environment:
POSTGRES_DB: localai
POSTGRES_USER: localai
POSTGRES_PASSWORD: localai
volumes:
- postgres_data:/var/lib/postgresql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U localai"]
interval: 5s
timeout: 3s
retries: 10
nats:
image: nats:2-alpine
ports:
- "4222:4222" # Client connections
- "8222:8222" # HTTP monitoring (optional, useful for debugging)
command: ["--js", "-m", "8222"] # Enable JetStream + monitoring
# --- LocalAI Frontend ---
# Stateless API server that routes requests to backend nodes.
# Add more replicas behind a load balancer for HA.
localai:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
ports:
- "8080:8080"
environment:
# Distributed mode
LOCALAI_DISTRIBUTED: "true"
LOCALAI_NATS_URL: "nats://nats:4222"
LOCALAI_AGENT_POOL_EMBEDDING_MODEL: "granite-embedding-107m-multilingual"
LOCALAI_AGENT_POOL_VECTOR_ENGINE: "postgres"
LOCALAI_AGENT_POOL_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Change this in production!
# Auth (required for distributed mode — must use PostgreSQL)
LOCALAI_AUTH: "true"
LOCALAI_AUTH_DATABASE_URL: "postgresql://localai:localai@postgres:5432/localai?sslmode=disable"
# Force pure-Go DNS resolver. The default cgo resolver follows the
# container's nsswitch.conf and ends up forwarding to host
# systemd-resolved (127.0.0.53), which isn't reachable from inside
# the container — failing every postgres/nats hostname lookup at
# boot. The pure-Go path reads /etc/resolv.conf directly and uses
# Docker's embedded DNS at 127.0.0.11.
GODEBUG: "netdns=go"
# Paths
MODELS_PATH: /models
volumes:
- frontend_models:/models
- frontend_data:/data
depends_on:
postgres:
condition: service_healthy
nats:
condition: service_started
# --- Worker Node ---
# A generic worker that self-registers with the frontend.
# The same LocalAI image is used — no separate image needed.
# The SmartRouter dynamically tells workers which backend to install via NATS.
#
# Model files are transferred from the frontend via HTTP file staging.
# The worker has its own independent models volume.
worker-1:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
command:
- worker
# The image's default HEALTHCHECK targets the server's /readyz on 8080.
# Workers don't run the OpenAI API server — their HTTP file transfer
# server runs on the gRPC base port - 1 (50050 here) and exposes /readyz.
# Override the env var so the inherited HEALTHCHECK probes the right port.
environment:
HEALTHCHECK_ENDPOINT: "http://localhost:50050/readyz"
LOCALAI_SERVE_ADDR: "0.0.0.0:50051"
LOCALAI_ADVERTISE_ADDR: "worker-1:50051"
LOCALAI_ADVERTISE_HTTP_ADDR: "worker-1:50050"
DEBUG: "true"
LOCALAI_REGISTER_TO: "http://localai:8080"
LOCALAI_NODE_NAME: "worker-1"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
LOCALAI_HEARTBEAT_INTERVAL: "10s"
LOCALAI_NATS_URL: "nats://nats:4222"
GODEBUG: "netdns=go" # See note in localai service
MODELS_PATH: /models
volumes:
- worker_1_models:/models
depends_on:
localai:
condition: service_started
nats:
condition: service_started
# --- GPU Support (NVIDIA) ---
# Uncomment the following and change the image to a CUDA variant
# (e.g., localai/localai:latest-gpu-nvidia-cuda-12) to enable GPU.
#
# NVIDIA_DRIVER_CAPABILITIES must include `utility` so nvidia-smi / NVML
# are available inside the container; without it the worker cannot report
# free VRAM and the Nodes page will show 0 free / total used.
# `init: true` avoids zombie-reap races that make nvidia-smi flaky.
#
# init: true
# environment:
# NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia.com/gpu
# count: all
# capabilities: [gpu, utility]
# --- Shared Volume Mode (optional) ---
# If all services run on the same Docker host, you can skip gRPC file transfer
# by sharing a single models volume. Replace the volumes above with:
#
# localai:
# volumes:
# - shared_models:/models
# - frontend_data:/data
#
# backend-llama-cpp:
# volumes:
# - shared_models:/models
#
# Then add to the volumes section:
# shared_models:
#
# With shared volumes, model files are already available on the backend —
# gRPC file staging becomes a no-op (paths match).
# --- Adding More Workers ---
# Copy the worker-1 service above and change:
# - Service name (e.g., worker-2)
# - LOCALAI_NODE_NAME (must be unique)
# - LOCALAI_ADVERTISE_ADDR (must match service name)
#
# Workers are generic — no backend type needed. The SmartRouter
# will dynamically install the required backend via NATS when
# a model request arrives.
# --- Agent Worker ---
# Dedicated process for agent chat execution.
# Receives chat jobs from NATS, runs cogito LLM calls via the LocalAI API,
# and publishes results back via NATS for SSE delivery.
# No database access needed — config and skills are sent in the NATS payload.
agent-worker-1:
# image: localai/localai:latest-cpu
build:
context: .
dockerfile: Dockerfile
args:
- IMAGE_TYPE=core
- BASE_IMAGE=ubuntu:24.04
# Install Docker CLI and start agent-worker.
# The Docker socket is mounted from the host so that MCP stdio servers
# using "docker run" commands can spawn containers on the host Docker.
entrypoint: ["/bin/sh", "-c"]
command:
- |
apt-get update -qq && apt-get install -y -qq docker.io >/dev/null 2>&1
exec /entrypoint.sh agent-worker
# The agent worker is NATS-only — no HTTP server to probe. Disable the
# image's inherited HEALTHCHECK so the container doesn't show unhealthy.
healthcheck:
disable: true
environment:
LOCALAI_NATS_URL: "nats://nats:4222"
LOCALAI_REGISTER_TO: "http://localai:8080"
LOCALAI_NODE_NAME: "agent-worker-1"
LOCALAI_REGISTRATION_TOKEN: "changeme" # Must match frontend token
GODEBUG: "netdns=go" # See note in localai service
volumes:
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
localai:
condition: service_started
nats:
condition: service_started
volumes:
postgres_data:
frontend_models:
frontend_data:
worker_1_models: