迁移方法1,通过代码迁移,优点不需要改动任何集群配置和重启器群:
from elasticsearch import Elasticsearch
from elasticsearch import helpers
def get_all_data(table, query_dsl,scroll='5m',timeout='1m'):
es = Elasticsearch([{'host': "IP", 'port': PORT, "http_auth": "user:passwd"}])
es_result = helpers.scan(
client=es,
query=query_dsl,
scroll=scroll,
index=table,
timeout=timeout
)
return es_result
def batch_data(data_list, index_name, _id_name=None):
""" 批量写入数据 """
es = Elasticsearch([{'host': "IP", 'port': PORT, "http_auth": "user:passwd"}])
if _id_name:
action = [{
"_index": index_name,
"_id": data_list[i][_id_name],
'_type': '_doc',
"_source": data_list[i]
} for i in range(len(data_list))]
else:
action = [{
"_index": index_name,
'_type': '_doc',
"_source": data_list[i]
} for i in range(len(data_list))]
helpers.bulk(es, action)
query_dsl = {
"query":{
"match_all":{}
}
}
es_result = get_all_data("index_name",query_dsl)
result = []
for each in es_result:
result.append(each.get("_source"))
if len(result)>3000:
batch_data(result,"index_name",_id_name="news_id")
result.clear()
迁移方式2:Elasticdump
迁移方式3:reindex
需要在目标机器的配置中添加旧集群的白名单:
reindex.remote.whitelist: "otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"
POST _reindex
{
"source": {
"remote": {
"host": "http://otherhost:9200",
"username": "user",
"password": "pass"
},
"index": "source",
"query": {
"match": {
"test": "data"
}
}
},
"dest": {
"index": "dest"
}
}