-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.prod.yml
More file actions
103 lines (100 loc) · 2.58 KB
/
docker-compose.prod.yml
File metadata and controls
103 lines (100 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Arctic Text2SQL Agent - Production Environment Override
#
# Usage: docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
#
# This file overrides the base docker-compose.yml for production deployments.
services:
api:
image: ghcr.io/sakeeb91/arctic-text2sql-agent:${IMAGE_TAG:-latest}
build: !reset null
environment:
# Database
- DATABASE_URL=${DATABASE_URL}
# HuggingFace
- HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
- TEXT2SQL_MODEL=${TEXT2SQL_MODEL:-Snowflake/Arctic-Text2SQL-R1-7B}
- MODEL_DEVICE=${MODEL_DEVICE:-cuda}
# API - Production settings
- API_HOST=0.0.0.0
- API_PORT=8000
- API_DEBUG=false
- CORS_ORIGINS=${CORS_ORIGINS}
# Agent
- AGENT_MAX_STEPS=${AGENT_MAX_STEPS:-5}
- AGENT_MIN_CONFIDENCE=${AGENT_MIN_CONFIDENCE:-0.8}
- AGENT_VERBOSITY=0
# Logging
- LOG_LEVEL=INFO
- LOG_FORMAT=json
# Cache
- REDIS_URL=redis://redis:6379/0
# Security
- SECRET_KEY=${SECRET_KEY}
# Rate Limiting
- RATE_LIMIT_REQUESTS=${RATE_LIMIT_REQUESTS:-100}
- RATE_LIMIT_WINDOW=${RATE_LIMIT_WINDOW:-60}
volumes:
# Production: no source mounts, only data volumes
- app-data:/app/data
- huggingface-cache:/home/appuser/.cache/huggingface
deploy:
mode: replicated
replicas: ${REPLICAS:-2}
resources:
limits:
memory: 32G
reservations:
memory: 16G
update_config:
parallelism: 1
delay: 30s
failure_action: rollback
order: start-first
rollback_config:
parallelism: 1
delay: 10s
restart_policy:
condition: any
delay: 5s
max_attempts: 3
window: 120s
labels:
- "environment=production"
- "version=${IMAGE_TAG:-latest}"
logging:
driver: json-file
options:
max-size: "100m"
max-file: "5"
db:
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB:-text2sql_prod}
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
labels:
- "environment=production"
logging:
driver: json-file
options:
max-size: "50m"
max-file: "3"
redis:
deploy:
resources:
limits:
memory: 1G
reservations:
memory: 512M
labels:
- "environment=production"
logging:
driver: json-file
options:
max-size: "20m"
max-file: "3"