kylin-3.1.2

最新推荐文章于 2023-11-01 10:12:29 发布
原创最新推荐文章于 2023-11-01 10:12:29 发布 · 427 阅读
0 ·
CC 4.0 BY-SA版权
hadoop 专栏收录该内容
6 篇文章
订阅专栏
本文档详细列出了Apache Kylin的各种配置参数，包括环境、元数据、服务器、Web、REST客户端、公共配置、立方体迁移、源、存储、作业、引擎、立方体和字典等设置，涵盖了从HDFS工作目录到查询超时时间等多个方面，旨在帮助管理员优化Kylin平台的性能和稳定性。
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


# The below commented values will effect as default settings
# Uncomment and override them if necessary

kylin.env.hdfs-working-dir=/kylin
kylin.env.zookeeper-base-path=/kylin
kylin.server.cluster-servers=localhost:7070
kylin.env.hadoop-conf-dir=/usr/local/src/bigdata/hadoop-3.2.2/etc/hadoop
#
#### METADATA | ENV ###
#
## The metadata store in hbase
#kylin.metadata.url=kylin_metadata@hbase
#
## metadata cache sync retry times
#kylin.metadata.sync-retries=3
#
## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory
#kylin.env.hdfs-working-dir=/kylin
#
## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions.
#kylin.env=QA
#
## kylin zk base path
#kylin.env.zookeeper-base-path=/kylin
#
#### SERVER | WEB | RESTCLIENT ###
#
## Kylin server mode, valid value [all, query, job]
#kylin.server.mode=all
#
## List of web servers in use, this enables one web server instance to sync up with other servers.
#kylin.server.cluster-servers=localhost:7070
#
## Display timezone on UI,format like[GMT+N or GMT-N]
#kylin.web.timezone=
#
## Timeout value for the queries submitted through the Web UI, in milliseconds
#kylin.web.query-timeout=300000
#
#kylin.web.cross-domain-enabled=true
#
##allow user to export query result
#kylin.web.export-allow-admin=true
#kylin.web.export-allow-other=true
#
## Hide measures in measure list of cube designer, separate by comma
#kylin.web.hide-measures=RAW
#
##max connections of one route
#kylin.restclient.connection.default-max-per-route=20
#
##max connections of one rest-client
#kylin.restclient.connection.max-total=200
#
#### PUBLIC CONFIG ###
#kylin.engine.default=2
#kylin.storage.default=2
#kylin.web.hive-limit=20
#kylin.web.help.length=4
#kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html
#kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs/tutorial/odbc.html
#kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html
#kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html
#kylin.web.link-streaming-guide=http://kylin.apache.org/
#kylin.htrace.show-gui-trace-toggle=false
#kylin.web.link-hadoop=
#kylin.web.link-diagnostic=
#kylin.web.contact-mail=
#kylin.server.external-acl-provider=
#
#### CUBE MIGRATION
#kylin.cube.migration.enabled=false
#
## Default time filter for job list, 0->current day, 1->last one day, 2->last one week, 3->last one year, 4->all
#kylin.web.default-time-filter=1
#
#### SOURCE ###
#
## Hive client, valid value [cli, beeline]
#kylin.source.hive.client=cli
#
## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
#kylin.source.hive.beeline-shell=beeline
#
## Parameters for beeline client, only necessary if hive client is beeline
##kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
#
## While hive client uses above settings to read hive table metadata,
## table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore.
#kylin.source.hive.enable-sparksql-for-table-ops=false
##kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline
##kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
#
#kylin.source.hive.keep-flat-table=false
#
## Hive database name for putting the intermediate flat tables
#kylin.source.hive.database-for-flat-table=default
#
## Whether redistribute the intermediate flat table before building
#kylin.source.hive.redistribute-flat-table=true
## Define how to access to hive metadata
## When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
#kylin.source.hive.metadata-type=hcatalog
#
#### STORAGE ###
#
## The storage for final cube file in hbase
#kylin.storage.url=hbase
#
## The prefix of hbase table
#kylin.storage.hbase.table-name-prefix=KYLIN_
#
## The namespace for hbase storage
#kylin.storage.hbase.namespace=default
#
## Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
#kylin.storage.hbase.compression-codec=none
#
## HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020
## Leave empty if hbase running on same cluster with hive and mapreduce
##kylin.storage.hbase.cluster-fs=
#
## The cut size for hbase region, in GB.
#kylin.storage.hbase.region-cut-gb=5
#
## The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster.
## Set 0 to disable this optimization.
#kylin.storage.hbase.hfile-size-gb=2
#
#kylin.storage.hbase.min-region-count=1
#kylin.storage.hbase.max-region-count=500
#
## Optional information for the owner of kylin platform, it can be your team's email
## Currently it will be attached to each kylin's htable attribute
#kylin.storage.hbase.owner-tag=whoami@kylin.apache.org
#
#kylin.storage.hbase.coprocessor-mem-gb=3
#
## By default kylin can spill query's intermediate results to disks when it's consuming too much memory.
## Set it to false if you want query to abort immediately in such condition.
#kylin.storage.partition.aggr-spill-enabled=true
#
## The maximum number of bytes each coprocessor is allowed to scan.
## To allow arbitrary large scan, you can set it to 0.
#kylin.storage.partition.max-scan-bytes=3221225472
#
## The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds,
## You can set it to a smaller value. 0 means use default.
## kylin.storage.hbase.coprocessor-timeout-seconds=0
#
## clean real storage after delete operation
## if you want to delete the real storage like htable of deleting segment, you can set it to true
#kylin.storage.clean-after-delete-operation=false
#
#### JOB ###
#
## Max job retry on error, default 0: no retry
#kylin.job.retry=0
#
## Max count of concurrent jobs running
#kylin.job.max-concurrent-jobs=10
#
## The percentage of the sampling, default 100%
#kylin.job.sampling-percentage=100
#
## If true, will send email notification on job complete
##kylin.job.notification-enabled=true
##kylin.job.notification-mail-enable-starttls=true
##kylin.job.notification-mail-host=smtp.office365.com
##kylin.job.notification-mail-port=587
##kylin.job.notification-mail-username=kylin@example.com
##kylin.job.notification-mail-password=mypassword
##kylin.job.notification-mail-sender=kylin@example.com
#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler
#kylin.job.scheduler.default=0
#
#### ENGINE ###
#
## Time interval to check hadoop job status
#kylin.engine.mr.yarn-check-interval-seconds=10
#
#kylin.engine.mr.reduce-input-mb=500
#
#kylin.engine.mr.max-reducer-number=500
#
#kylin.engine.mr.mapper-input-rows=1000000
#
## Enable dictionary building in MR reducer
#kylin.engine.mr.build-dict-in-reducer=true
#
## Number of reducers for fetching UHC column distinct values
#kylin.engine.mr.uhc-reducer-count=3
#
## Whether using an additional step to build UHC dictionary
#kylin.engine.mr.build-uhc-dict-in-additional-step=false
#
#
#### CUBE | DICTIONARY ###
#
#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
#
## 'auto', 'inmem', 'layer' or 'random' for testing 
#kylin.cube.algorithm=layer
#
## A smaller threshold prefers layer, a larger threshold prefers in-mem
#kylin.cube.algorithm.layer-or-inmem-threshold=7
#
## auto use inmem algorithm:
## 1, cube planner optimize job
## 2, no source record
#kylin.cube.algorithm.inmem-auto-optimize=true
#
#kylin.cube.aggrgroup.max-combination=32768
#
#kylin.snapshot.max-mb=300
#
#kylin.cube.cubeplanner.enabled=true
#kylin.cube.cubeplanner.enabled-for-existing-cube=true
#kylin.cube.cubeplanner.expansion-threshold=15.0
#kylin.cube.cubeplanner.recommend-cache-max-size=200
#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
#kylin.cube.cubeplanner.algorithm-threshold-greedy=8
#kylin.cube.cubeplanner.algorithm-threshold-genetic=23
#
#
#### QUERY ###
#
## Controls the maximum number of bytes a query is allowed to scan storage.
## The default value 0 means no limit.
## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor.
#kylin.query.max-scan-bytes=0
#
#kylin.query.cache-enabled=true
#
## Controls extras properties for Calcite jdbc driver
## all extras properties should undder prefix "kylin.query.calcite.extras-props."
## case sensitive, default: true, to enable case insensitive set it to false
## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE
#kylin.query.calcite.extras-props.caseSensitive=true
## how to handle unquoted identity, defualt: TO_UPPER, available options: UNCHANGED, TO_UPPER, TO_LOWER
## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING
#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER
## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE, BACK_TICK, BRACKET
## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING
#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE
## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal
## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum
#kylin.query.calcite.extras-props.conformance=LENIENT
#
## TABLE ACL
#kylin.query.security.table-acl-enabled=true
#
## Usually should not modify this
#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
#
#kylin.query.escape-default-keyword=false
#
## Usually should not modify this
#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
#
#### SECURITY ###
#
## Spring security profile, options: testing, ldap, saml
## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login
#kylin.security.profile=testing
#
## Admin roles in LDAP, for ldap and saml
#kylin.security.acl.admin-role=admin
#
## LDAP authentication configuration
#kylin.security.ldap.connection-server=ldap://ldap_server:389
#kylin.security.ldap.connection-username=
#kylin.security.ldap.connection-password=
## When you use the customized CA certificate library for user authentication based on LDAPs, you need to configure this item.
## The value of this item will be added to the JVM parameter javax.net.ssl.trustStore.
#kylin.security.ldap.connection-truststore=
#
## LDAP user account directory;
#kylin.security.ldap.user-search-base=
#kylin.security.ldap.user-search-pattern=
#kylin.security.ldap.user-group-search-base=
#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
#
## LDAP service account directory
#kylin.security.ldap.service-search-base=
#kylin.security.ldap.service-search-pattern=
#kylin.security.ldap.service-group-search-base=
#
### SAML configurations for SSO
## SAML IDP metadata file location
#kylin.security.saml.metadata-file=classpath:sso_metadata.xml
#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
#kylin.security.saml.keystore-file=classpath:samlKeystore.jks
#kylin.security.saml.context-scheme=https
#kylin.security.saml.context-server-name=hostname
#kylin.security.saml.context-server-port=443
#kylin.security.saml.context-path=/kylin
#
#### SPARK ENGINE CONFIGS ###
#
## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
## This must contain site xmls of core, yarn, hive, and hbase in one folder
##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
#
## Estimate the RDD partition numbers
#kylin.engine.spark.rdd-partition-cut-mb=10
#
## Minimal partition numbers of rdd
#kylin.engine.spark.min-partition=1
#
## Max partition numbers of rdd
#kylin.engine.spark.max-partition=5000
#
## Spark conf (default is in spark/conf/spark-defaults.conf)
#kylin.engine.spark-conf.spark.master=yarn
##kylin.engine.spark-conf.spark.submit.deployMode=cluster
#kylin.engine.spark-conf.spark.yarn.queue=default
#kylin.engine.spark-conf.spark.driver.memory=2G
#kylin.engine.spark-conf.spark.executor.memory=4G
#kylin.engine.spark-conf.spark.executor.instances=40
#kylin.engine.spark-conf.spark.yarn.executor.memoryOverhead=1024
#kylin.engine.spark-conf.spark.shuffle.service.enabled=true
#kylin.engine.spark-conf.spark.eventLog.enabled=true
#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
#
#### Spark conf for specific job
#kylin.engine.spark-conf-mergedict.spark.executor.memory=6G
#kylin.engine.spark-conf-mergedict.spark.memory.fraction=0.2
#
## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime
##kylin.engine.spark-conf.spark.yarn.archive=hdfs://namenode:8020/kylin/spark/spark-libs.jar
##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
#
## uncomment for HDP
##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
#
#
#### FLINK ENGINE CONFIGS ###
#
### Flink conf (default is in flink/conf/flink-conf.yaml)
#kylin.engine.flink-conf.jobmanager.heap.size=2G
#kylin.engine.flink-conf.taskmanager.heap.size=4G
#kylin.engine.flink-conf.taskmanager.numberOfTaskSlots=1
#kylin.engine.flink-conf.taskmanager.memory.preallocate=false
#kylin.engine.flink-conf.job.parallelism=1
#kylin.engine.flink-conf.program.enableObjectReuse=false
#kylin.engine.flink-conf.yarn.queue=
#kylin.engine.flink-conf.yarn.nodelabel=
#
#### QUERY PUSH DOWN ###
#
##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.adhoc.PushDownRunnerJdbcImpl
#
##kylin.query.pushdown.update-enabled=false
##kylin.query.pushdown.jdbc.url=jdbc:hive2://sandbox:10000/default
##kylin.query.pushdown.jdbc.driver=org.apache.hive.jdbc.HiveDriver
##kylin.query.pushdown.jdbc.username=hive
##kylin.query.pushdown.jdbc.password=
#
##kylin.query.pushdown.jdbc.pool-max-total=8
##kylin.query.pushdown.jdbc.pool-max-idle=8
##kylin.query.pushdown.jdbc.pool-min-idle=0
#
#### JDBC Data Source
##kylin.source.jdbc.connection-url=
##kylin.source.jdbc.driver=
##kylin.source.jdbc.dialect=
##kylin.source.jdbc.user=
##kylin.source.jdbc.pass=
##kylin.source.jdbc.sqoop-home=
##kylin.source.jdbc.filed-delimiter=|
#
#### Livy with Kylin
##kylin.engine.livy-conf.livy-enabled=false
##kylin.engine.livy-conf.livy-url=http://LivyHost:8998
##kylin.engine.livy-conf.livy-key.file=hdfs:///path-to-kylin-job-jar
##kylin.engine.livy-conf.livy-arr.jars=hdfs:///path-to-hadoop-dependency-jar
#
#
#### Realtime OLAP ###
#
## Where should local segment cache located, for absolute path, the real path will be ${KYLIN_HOME}/${kylin.stream.index.path}
#kylin.stream.index.path=stream_index
#
## The timezone for Derived Time Column like hour_start, try set to GMT+N, please check detail at KYLIN-4010
#kylin.stream.event.timezone=
#
## Debug switch for print realtime global dict encode information, please check detail at KYLIN-4141
#kylin.stream.print-realtime-dict-enabled=false
#
## Should enable latest coordinator, please check detail at KYLIN-4167
#kylin.stream.new.coordinator-enabled=true
#
## In which way should we collect receiver's metrics info
##kylin.stream.metrics.option=console/csv/jmx
#
## When enable a streaming cube, should cousme from earliest offset or least offset
#kylin.stream.consume.offsets.latest=true
#
## The parallelism of scan in receiver side
#kylin.stream.receiver.use-threads-per-query=8
#
## How coordinator/receiver register itself into StreamMetadata, there are three option:
## 1. hostname:port, then kylin will set the config ip and port as the currentNode;
## 2. port, then kylin will get the node's hostname and append port as the currentNode;
## 3. not set, then kylin will get the node hostname address and set the hostname and defaultPort(7070 for coordinator or 9090 for receiver) as the currentNode.
##kylin.stream.node=
#
## Auto resubmit after job be discarded
#kylin.stream.auto-resubmit-after-discard-enabled=true