简介
Openmetadata是一个非常优秀元数据采集框架,支持Mysql、Hive、Oracle、Postgres、Presto、Clickhouse等众多数据库,也支持kafka、Kinesis等消息队列、Superset、metabase等报表软件,但可惜的是1.2版本尚不支持Apache Doris,下面基于新增Doris数据源介绍如何在Openmetadata中新增一个数据源,代码本人已提交到https://github.com/open-metadata/OpenMetadata/pull/14087
代码开发
一、Json配置文件修改
定义Doris数据源
参考Mysql数据源定义增加openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/dorisConnection.json
{
"$id": "https://open-metadata.org/schema/entity/services/connections/database/dorisConnection.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "DorisConnection",
"description": "Doris Database Connection Config",
"type": "object",
"javaType": "org.openmetadata.schema.services.connections.database.DorisConnection",
"definitions": {
"dorisType": {
"description": "Service type.",
"type": "string",
"enum": ["Doris"],
"default": "Doris"
},
"dorisScheme": {
"description": "SQLAlchemy driver scheme options.",
"type": "string",
"enum": ["doris"],
"default": "doris"
}
},
"properties": {
"type": {
"title": "Service Type",
"description": "Service Type",
"$ref": "#/definitions/dorisType",
"default": "Doris"
},
"scheme": {
"title": "Connection Scheme",
"description": "SQLAlchemy driver scheme options.",
"$ref": "#/definitions/dorisScheme",
"default": "doris"
},
"username": {
"title": "Username",
"description": "Username to connect to Doris. This user should have privileges to read all the metadata in Doris.",
"type": "string"
},
"password": {
"title": "Password",
"description": "Password to connect to Doris.",
"type": "string",
"format": "password"
},
"hostPort": {
"title": "Host and Port",
"description": "Host and port of the Doris service.",
"type": "string"
},
"databaseName": {
"title": "Database Name",
"description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.",
"type": "string"
},
"databaseSchema": {
"title": "Database Schema",
"description": "Database Schema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.",
"type": "string"
},
"sslCA": {
"title": "SSL CA",
"description": "Provide the path to ssl ca file",
"type": "string"
},
"sslCert": {
"title": "SSL Client Certificate File",
"description": "Provide the path to ssl client certificate file (ssl_cert)",
"type": "string"
},
"sslKey": {
"title": "SSL Client Key File",
"description": "Provide the path to ssl client certificate file (ssl_key)",
"type": "string"
},
"connectionOptions": {
"title": "Connection Options",
"$ref": "../connectionBasicType.json#/definitions/connectionOptions"
},
"connectionArguments": {
"title": "Connection Arguments",
"$ref": "../connectionBasicType.json#/definitions/connectionArguments"
},
"supportsMetadataExtraction": {
"title": "Supports Metadata Extraction",
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
},
"supportsDBTExtraction": {
"$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction"
},
"supportsProfiler": {
"title": "Supports Profiler",
"$ref": "../connectionBasicType.json#/definitions/supportsProfiler"
},
"supportsQueryComment": {
"title": "Supports Query Comment",
"$ref": "../connectionBasicType.json#/definitions/supportsQueryComment"
}
},
"additionalProperties": false,
"required": ["hostPort", "username"]
}
添加Doris独有的数据类型
修改openmetadata-spec/src/main/resources/json/schema/entity/data/table.json
在definitions.dataType.enum中增加Doris的数据类型
"HLL",
"LARGEINT",
"QUANTILE_STATE",
"AGG_STATE",
"BITMAP"
databaseService.json中增加Doris数据源
修改openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json
definitions.enum中增加Doris,definitions.javaEnums中增加 “name”: “Doris”
databaseConnection.properties.config.oneOf增加 “$ref”: “./connections/database/dorisConnection.json”
testConnections增加doris
openmetadata-service/src/main/resources/json/data/testConnections/database/doris.json
修改完配置文件后Python、Java、Ui都需要重新编译
二、修改前端文件
添加Doris logo
openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-doris.png
引用Doris logo
增加Doris数据源
指定dorisConnection.json文件
三、Python代码开发
增加pydoris依赖
在ingestion/setup.py中增加pydoris依赖
开发Python获取Doris元数据核心代码
在ingestion/src/metadata/ingestion/source/database下创建doris文件夹
将以下文件放到doris文件夹下
init.pyconnection.pymetadata.pyqueries.pyutils.py
四、测试
指定metadata文件地址,点击运行
metadata.yaml文件如下:
source:
type: doris
serviceName: dorisss
serviceConnection:
config:
type: Doris
username: root
password: 123456
hostPort: 192.168.***.***:9030
databaseName: default
databaseSchema: test
connectionOptions:
charset: utf8
# connectionArguments:
# key: value
sourceConfig:
config:
type: DatabaseMetadata
markDeletedTables: true
includeTables: true
includeViews: true
# includeTags: true
databaseFilterPattern:
includes:
- test
# - database2
# excludes:
# - database3
# - database4
schemaFilterPattern:
includes:
- test
# excludes:
# - schema3
# - schema4
# tableFilterPattern:
# includes:
# - table1
# - table2
# excludes:
# - table3
# - table4
sink:
type: metadata-rest
config: {}
workflowConfig:
loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR
openMetadataServerConfig:
hostPort: "http://127.0.0.1:8585/api"
authProvider: openmetadata
securityConfig:
jwtToken: "eyJraWQiOiJHY***"
## If SSL, fill the following
# verifySSL: validate # or ignore
# sslConfig:
# certificatePath: /local/path/to/certificate
输出Workflow finished successfully时表明任务运行成功
页面查看
