-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
185 lines (169 loc) · 4.93 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
version: '3.8'
x-spark-common: &spark-common
build:
context : .
dockerfile: DockerFile2
volumes:
- ./jobs:/opt/bitnami/spark/jobs
networks:
- YNS_NETWORK
x-airflow-common: &airflow-common
build:
context: .
dockerfile: Dockerfile
env_file:
- airflow.env
volumes:
- ./jobs:/opt/airflow/jobs
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./schema:/opt/airflow/schema
depends_on:
- postgres
networks:
- YNS_NETWORK
services:
zookeeper:
image: zookeeper:3.8.1 # Updated to a reliable version
container_name: zookeeper
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
networks:
- YNS_NETWORK
kafka:
image: bitnami/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- "9092:9092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ADVERTISED_LISTENERS: INSIDE://kafka:9093,OUTSIDE://localhost:9092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: INSIDE://0.0.0.0:9093,OUTSIDE://0.0.0.0:9092
KAFKA_LISTENER_NAME: INSIDE
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LOG_RETENTION_HOURS: 168
KAFKA_LOG_RETENTION_BYTES: 1073741824
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka:9093
CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
CONFLUENT_METRICS_ENABLE: 'true'
CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
networks:
- YNS_NETWORK
schema-registry:
image: confluentinc/cp-schema-registry:latest
hostname: schema-registry
container_name: schema-registry
depends_on:
- kafka
ports:
- "8081:8081"
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'kafka:9093'
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
networks:
- YNS_NETWORK
healthcheck:
test: [ "CMD", "curl", "-f", "http://schema-registry:8081" ]
interval: 30s
timeout: 10s
retries: 5
control-center:
image: confluentinc/cp-enterprise-control-center:latest
container_name: control-center
ports:
- "9021:9021"
environment:
CONTROL_CENTER_BOOTSTRAP_SERVERS: 'kafka:9093'
CONTROL_CENTER_REPLICATION_FACTOR: 1
CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1
CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1
CONFLUENT_METRICS_TOPIC_REPLICATION: 1
CONFLUENT_METRICS_ENABLE: 'true'
CONTROL_CENTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081'
PORT: 9021
networks:
- YNS_NETWORK
depends_on:
- kafka
- schema-registry
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9021/health" ]
interval: 30s
timeout: 10s
retries: 5
cassandra_db:
image: cassandra:latest
container_name: cassandra
hostname: cassandra
ports:
- "9042:9042"
environment:
- MAX_HEAP_SIZE=2G # Increase heap size
- HEAP_NEWSIZE=500M # Increase young generation heap size
- CASSANDRA_USERNAME=cassandra
- CASSANDRA_PASSWORD=cassandra
- JVM_OPTS=-Xms2G -Xmx2G # Set minimum and maximum heap size for JVM
- GC_GRACE_SECONDS=864000 # Increase GC grace seconds to 10 days
- CONFLUENT_SUPPORT_CUSTOMER_ID=anonymous
volumes:
- ./cassandra_data:/var/lib/cassandr
networks:
- YNS_NETWORK
spark-master:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.master.Master
ports:
- "9090:8080"
- "7077:7077"
spark-worker:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
depends_on:
- spark-master
environment:
SPARK_MODE: worker
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_MASTER_URL: spark://spark-master:7077
spark-worker-2:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
depends_on:
- spark-master
environment:
SPARK_MODE: worker
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_MASTER_URL: spark://spark-master:7077
postgres:
image: postgres:14.0
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- ./pg_data:/var/lib/postgresql/data
networks:
- YNS_NETWORK
webserver:
<<: *airflow-common
command: webserver
ports:
- "8080:8080"
depends_on:
- scheduler
- postgres
scheduler:
<<: *airflow-common
command: bash -c "airflow db migrate && airflow users create --username admin --firstname YNS --lastname Bousetta --role Admin --email [email protected] --password admin && airflow scheduler"
networks:
YNS_NETWORK:
driver: bridge