1. datax工具准备
本篇直接介绍贴几个datax写入数据的模板文件
起码准备一个datax工具,去人家官网下载一下吧,我这里贴上工具还的费劲网速下载耽误时间,不如各位老板去人家官网下载就好啦
现在阿里云datax工具上一篇文章有介绍这里不再废话叨叨啦:
https://blog.youkuaiyun.com/pengge2/article/details/114585863
为了避免有些老板急性子,懒得翻文章,这里还是再次贴一下下载地址吧:
准备datax工具
首先准备阿里的datax工具官网下载安装编译即可
https://github.com/alibaba/DataX
download 地址:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
Write elasticsearch plugin模块需要单独编译才能使用,使用需要下拉elastic的源码进行编译如下:
下载二进制文件之后,使用命令为:
python /data/datax/datax/bin/datax.py test.json
python为linux系统默认2.7.5版本即可
datax py为启动脚本文件
test.json为写入数据对应的数据模板文件
2. 各种类型的写入数据
Datax工具使用
1. 准备datax工具
2. Drds写入drds 考虑的场景是百亿大数据量场景的迁移方案,会比navicat数据库迁移稍微快半小时
cat > drds-drds.json<<EOF
> {
> "job": {
> "setting": {
> "speed": {
> "channel": 8,
> "record": 1,
> "byte":-1
> },
> "errorLimit": {
> "record": 0,
> "percentage": 0.02
> }
> }
> },
> "content": [
> {
> "reader": {
> "name": "drdsreader",
> "parameter": {
> "username": "drds_crm_dev_jt_lydb",
> "password": "Wc123456",
> "column": [
> {
> "MEMBER_ID",
> "MOBILE",
> "CITY"
> }
> ]
> ],
> "connection":[
> {
> "table": [
> "ly_member_test_1"
> ],
> "jdbcUrl":[
> "jdbc:mysql://192.168.108.119:3306/drds_crm_dev_jt_lydb"
> ]
> }
> ]
> }
> }
> },
> "writer": {
> "name": "drdswriter",
> "parameter": {
> "username": "drds_crm_dev_jt_lydb",
> "password": "Wc123456",
> "column": [
> "MEMBER_ID",
> "MOBILE",
> "CITY"
> ],
> "connection": [
> {
> "jdbcUrl": "jdbc:mysql://192.168.108.119:3306/drds_crm_sit_jt_lydb?useUnicode=ture&characterEncoding=gbk",
> "table": [
> "ly_member_test_2"
> ]
> }
> ]
> }
> }
>
> }
>
> EOF
3. mysql(rds)写入es
cat > rds-es.json <<EOF
> {
> "job": {
> "setting": {
> "speed": {
> "channel":1
> }
> },
> "content": [
> {
> "reader": {
> "name": "mysqlreader",
> "parameter": {
> "username": "zabbix",
> "password": "zabbix",
> "connection": [
> {
> "querySql": [
> "select * from trends;"
> ],
> "jdbcUrl": [
> "jdbc:mysql://192.168.248.171:3306/zabbix"
> ]
> }
> ]
> }
> },
> "writer": {
> "name": "elasticsearchwriter",
> "parameter": {
> "endpoint": "http://192.168.248.175:9200",
> "accessId": "root",
> "accessKey": "root",
> "index": "test",
> "type": "default",
> "cleanup": true,
> "settings": {"index" :{"number_of_shards": 1, "number_of_replicas": 0}},
> "discovery": false,
> "batchSize": 1000,
> "splitter": ",",
> "column": [
> {"name": "cust_code","type":"text"},
> {"name": "member_id","type":"text"},
> .....中间有一坨99个字段懒得贴了太长老板们看的也费劲
> {"name": "statis_time","type":"text"}
> ]
> }
> }
> }
> ]
> }
> }
> EOF
4. drds写入es
cat > test << EOF
> {
> "job": {
> "setting": {
> },
> "content": [
> {
> "reader": {
> "name": "drdsreader",
> "parameter": {
> "username": "root",
> "password": "root",
> "where": "",
> "connection": [
> {
> "querySql": [
> "select db_id,on_line_flag from db_info where db_id < 10;"
> ],
> "jdbcUrl": [
> "jdbc:drds://localhost:3306/database"]
> }
> ]
> }
> },
> "writer": {
> "name": "elasticsearchwriter",
> "parameter": {
> "endpoint": "http://192.168.248.175:9200",
> "accessId": "root",
> "accessKey": "root",
> "index": "test",
> "type": "default",
> "cleanup": true,
> "settings": {"index" :{"number_of_shards": 1, "number_of_replicas": 0}},
> "discovery": false,
> "batchSize": 1000,
> "splitter": ",",
> "column": [
> {"name": "cust_code","type":"text"},
> {"name": "member_id","type":"text"},
.....中间有一坨99个字段
> {"name": "statis_time","type":"text"}
> ]
> }
> }
> }
> ]
> }
> }EOF
5. csv写入txt
cat >test << EOF
> {
> "setting": {},
> "job": {
> "setting": {
> "speed": {
> "channel": 2
> }
> },
> "content": [
> {
> "reader": {
> "name": "txtfilereader",
> "parameter": {
> "path": ["/data/datax/job/csv-txt/data/es.csv"],
> "encoding": "UTF-8",
> "column": ["*"],
> "fieldDelimiter": ","
> }
> },
> "writer": {
> "name": "txtfilewriter",
> "parameter": {
> "path": "/data/datax/job/csv-txt/result/pengge.txt",
> "fileName": "pengge",
> "writeMode": "truncate",
> "format": "yyyy-MM-dd"
> }
> }
> }
> ]
> }
> }
>
> EOF
6. hdfs写入es
cat > test << EOF
> {
> "job": {
> "setting": {
> "speed": {
> "channel": 8
> }
> },
> "content": [
> {
> "reader": {
> "name":"hdfsreader",
> "parameter": {
> "path":["/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000000_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000001_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000002_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000003_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000004_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000005_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000006_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000007_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000008_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000009_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000010_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000011_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000012_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000013_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000014_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000015_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000016_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000017_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000018_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000019_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000020_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000021_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000022_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000023_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000024_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000025_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000026_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000027_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000028_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000029_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000030_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000031_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000032_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000033_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000034_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000035_0","/inceptorsql6/user/hive/warehouse/bloc.db/aicrm/st_bloc_cust_info_index_yyyymmdd/month_id=202012/day_id=20201231/000036_0"],
> "defaultFS":"hdfs://nameservice1",
> "hadoopConfig":{
> "dfs.nameservices":"nameservice1",
> "dfs.namenode.rpc-address.nameservice1.nn1":"crmnn1:8020",
> "dfs.namenode.rpc-address.nameservice1.nn2":"crmnn2:8020",
> "dfs.ha.namenodes.nameservice1":"nn1,nn2",
> "dfs.client.failover.proxy.provider.nameservice1":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
> },
> "column":["*"],
> "fileType":"orc",
> "encoding":"UTF-8",
> "fieldDelimiter":",",
> "haveKerberos":"true",
> "kerberosKeytabFilePath":"/home/aicrm/dacp/dacp-dp-3.0.0/aicrm.keytab",
> "kerberosPrincipal":"aicrm@CRMTDH"
> }
> },
> "writer": {
> "name": "elasticsearchwriter",
> "parameter": {
> "endpoint": "http://10.1.182.126:9200",
> "accessId": "admin",
> "accessKey": "123456",
> "index": "jt-cust-tag-v2",
> "type": "default",
> "cleanup":true,
> "settings": {
> "index": {
> "number_of_shards": 12,
> "number_of_replicas": 1
> }
> },
> "discovery": false,
> "batchSize": 2048,
> "trySize":50,
> "splitter": ",",
> "column": [
> {"name": "cust_code","type":"text"},
> {"name": "member_id","type":"text"},
.....中间有一坨99个字段
> {"name": "statis_time","type":"text"}
> ]
> }
> }
> }
> ]
> }
> }
> EOF
关键点就是对应的模板对应的写入字段,格式,逗号之类的调试问题,上面贴的json内容可以根据自己的运行环境大概的改吧改吧就得了
对了在贴一下在线的json文件格式校验的网站:
http://www.kjson.com
方便各位老板能够愉快的解决工作中一些调皮的"小问题"
好啦,祝愿各位观看的老板,天天开心,开开心心生活,快快乐乐的工作,迎接美好的每一天~~