es的java bulk load,直接贴代码
public class BulkLoad {
public static void main(String[] args) {
if (args.length != 5) {
System.out.println("参数1 集群名字");
System.out.println("参数2 集群其中一个节点的ip");
System.out.println("参数3 需要导入的文件的位置");
System.out.println("参数4 所以名字");
System.out.println("参数5 批量导入的patch数");
return ;
}
String cluster = args[0];
String ip = args[1];
String path = args[2];
String index = args[3];
int patch = Integer.parseInt(args[4]);
Settings settings = Settings.settingsBuilder().put("cluster.name", cluster)
.put("client.transport.sniff", true).build();
TransportClient client = TransportClient.builder().settings(settings).build();
try {
client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(ip), 9300));
} catch (UnknownHostException e) {
System.out.println("es集群主机名错误");
e.printStackTrace();
}
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(new File(path)));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
String line = null;
int count = 1;
try {
BulkRequestBuilder bulkRequest = client.prepareBulk();
long start = System.currentTimeMillis();
while ((line = br.readLine()) != null) {
bulkRequest.add(client.prepareIndex(index, "nature").setSource(buildXContentBuilder(line)));
// 每5w条提交一次
if (count % patch == 0) {
bulkRequest.execute().actionGet();
bulkRequest = client.prepareBulk();
System.out.println("提交了" + count + "条");
}
count++;
}
bulkRequest.execute().actionGet();
long end = System.currentTimeMillis();
System.out.println("插入完毕, 耗时" + (end - start) + "ms");
} catch (IOException e) {
e.printStackTrace();
} finally {
if (client != null) {
client.close();
}
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private static XContentBuilder buildXContentBuilder(String line) throws IOException{
String[] row = line.trim().split("\t");
return jsonBuilder()
.startObject()
.field("field1", row[0])
.field("field2", row[1])
.endObject();
}
}
public static void main(String[] args) {
if (args.length != 5) {
System.out.println("参数1 集群名字");
System.out.println("参数2 集群其中一个节点的ip");
System.out.println("参数3 需要导入的文件的位置");
System.out.println("参数4 所以名字");
System.out.println("参数5 批量导入的patch数");
return ;
}
String cluster = args[0];
String ip = args[1];
String path = args[2];
String index = args[3];
int patch = Integer.parseInt(args[4]);
Settings settings = Settings.settingsBuilder().put("cluster.name", cluster)
.put("client.transport.sniff", true).build();
TransportClient client = TransportClient.builder().settings(settings).build();
try {
client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(ip), 9300));
} catch (UnknownHostException e) {
System.out.println("es集群主机名错误");
e.printStackTrace();
}
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(new File(path)));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
String line = null;
int count = 1;
try {
BulkRequestBuilder bulkRequest = client.prepareBulk();
long start = System.currentTimeMillis();
while ((line = br.readLine()) != null) {
bulkRequest.add(client.prepareIndex(index, "nature").setSource(buildXContentBuilder(line)));
// 每5w条提交一次
if (count % patch == 0) {
bulkRequest.execute().actionGet();
bulkRequest = client.prepareBulk();
System.out.println("提交了" + count + "条");
}
count++;
}
bulkRequest.execute().actionGet();
long end = System.currentTimeMillis();
System.out.println("插入完毕, 耗时" + (end - start) + "ms");
} catch (IOException e) {
e.printStackTrace();
} finally {
if (client != null) {
client.close();
}
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private static XContentBuilder buildXContentBuilder(String line) throws IOException{
String[] row = line.trim().split("\t");
return jsonBuilder()
.startObject()
.field("field1", row[0])
.field("field2", row[1])
.endObject();
}
}
利用的是9300的tcp端口,相对于9200的http接口导入速度大概会提高10倍以上,每秒4w左右的索引速度。
注意点:bulkRequest.execute().actionGet()之后需要再次调用prepareBulk,否则client中的数据越积越多,每次bulk之后需要先清空