距离部署已经过去很久了,很多细节不记得了,但还是大致做个备忘
docker-compose编排
version: '3'
networks:
flink-network:
external: true
services:
jobmanager:
container_name: job_manager
tty: true
restart: always
privileged: true
build:
context: ./dockerfile/flink-job
dockerfile: Dockerfile
args:
- TIMEZONE=${TIMEZONE}
ports:
- "8081:8081"
volumes:
- ./lib:/opt/flink/lib/
- ./conf:/opt/flink/conf
- ./checkpoints:/opt/flink/checkpoints
- ./log/job_manager:/opt/flink/log
- ./flinkrecovery/:/opt/flink/flinkrecovery
- ./jobs:/opt/flink/jobs
command: jobmanager
environment:
- JOB_MANAGER_RPC_ADDRESS:jobmanager
- TZ:Asia/Shanghai
networks:
- flink-network
taskmanager:
container_name: task_manager
tty: true
restart: always
privileged: true
build:
context: ./dockerfile/flink-task
dockerfile: Dockerfile
args:
- TIMEZONE=${TIMEZONE}
volumes:
- ./lib:/opt/flink/lib/
- ./conf:/opt/flink/conf
- ./checkpoints:/opt/flink/checkpoints
- ./log/task_manager:/opt/flink/log
- ./jobs:/opt/flink/jobs
command: taskmanager
depends_on:
- jobmanager
links:
- "jobmanager:jobmanager"
environment:
- JOB_MANAGER_RPC_ADDRESS=jobmanager
networks:
- flink-network
.env文件范例
DOCKER_FILE_PATH=./dockerfile
TIMEZONE=Asia/Shanghai
conf/config.yaml内容
blob:
server:
port: '6124'
state:
checkpoints:
num-retained: '3'
dir: file:///opt/flink/checkpoints
savepoints:
dir: file:///opt/flink/checkpoints
backend: filesystem
metrics:
reporter:
promgateway:
port: '9091'
interval: 30 SECONDS
jobName: flink-metrics
factory:
class: org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporterFactory
class: org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter
randomJobNameSuffix: 'true'
deleteOnShutdown: 'false'
host: 外网ip
rest:
flamegraph:
enabled: true
high-availability:
type: zookeeper
cluster-id: /flink-ha
storageDir: file:///opt/flink/ha/
zookeeper:
client: open
quorum: host1:2181,host2:2181,host3:2181
jobmanager:
rpc:
address: jobmanager
memory:
process:
size: 2000m
taskmanager:
memory:
jvm-metaspace:
size: 500m
process:
size: 4000m
numberOfTaskSlots: '20'
query:
server:
port: '6125'
execution:
checkpointing:
mode: EXACTLY_ONCE
interval: '5000'
timeout: '600000'
externalized-checkpoint-retention: RETAIN_ON_CANCELLATION
min-pause: '500'
max-concurrent-checkpoints: '1'
conf/zoo.cfg
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial synchronization phase can take
initLimit=10
# The number of ticks that can pass between sending a request and getting an acknowledgement
syncLimit=5
# The directory where the snapshot is stored.
# dataDir=/tmp/zookeeper
# The port at which the clients will connect
clientPort=2181
# ZooKeeper quorum peers
server.1=host1:2888:3888
server.2=host2:2888:3888
server.3=host3:2888:3888
zookeeper.quorum: host1:2181,host2:2182,host1:2183
conf/masters
外网ip:8081
conf/workers
ip1
ip2
ip3