1. 增删改查
创建index
curl -XPUT http://10.11.8.32:9200/movielens
建立索引映射
curl -XPOST http://10.11.8.32:9200/movielens/movies/_mapping -d'
{
"properties": { "id": { "type": "integer" },
"title": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"likes": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
}
}
}'
curl -XPOST http://10.11.8.32:9200/movielens/movies/_search -d'
{
"query": {
"more_like_this" : {
"fields" : ["title", "likes"],
"like_text" : "阿里、京东明争暗斗",
"min_term_freq" : 1,
"max_query_terms" : 12
}
}
}
'
curl -XGET http://10.11.8.32:9200/yelpindex/yelp?pretty
curl -XGET http://10.11.8.32:9200/yelpindex/yelp/90
curl -XDELETE 'http://10.11.8.32:9200/movielens'
2. 集群管理
检查健康状态
curl 'http://10.11.8.32:9200/_cat/health?v'
集群节点列表
curl 'http://10.11.8.32:9200/_cat/nodes?v'
查看索引
curl 'http://10.11.8.32:9200/_cat/indices?v'
3. ES与Hadoop
3.1
drop TABLE `news_info`;
CREATE TABLE `news_info`(
`id` int,
`title` string,
`likes` string)
row format delimited fields terminated by ',';
3.2
load data local inpath '/home/es/news.csv' into table news_info;
3.3drop TABLE `ext_news_info`;
add jar /opt/cloudera/parcels/CDH/lib/hive/lib/elasticsearch-hadoop-7.0.0-alpha1-SNAPSHOT.jar;
CREATE EXTERNAL TABLE ext_news_info (
`id` int,
`title` string,
`likes` string)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.nodes' = '10.11.8.32:9200',
'es.index.auto.create' = 'true',
'es.resource' = 'movielens/movies',
'es.mapping.id' = 'id',
'es.mapping.names' = 'title:title,
likes:likes');
3.4
INSERT overwrite TABLE ext_news_info
SELECT id,title
,likes
FROM news_info;
3.5
curl http://10.11.8.32:9200
curl -XGET http://10.11.8.32:9200/movielens/movies/1
4. ES与spark
elasticsearch-hadoop-6.1.2.zip
https://github.com/elastic/elasticsearch-hadoop --->
https://www.elastic.co/downloads/hadoop
ctdn-1:
cd /data/tools
unzip -e elasticsearch-hadoop-6.1.2.zip
[root@ctdn-1 dist]# cp elasticsearch-spark-20_2.11-6.1.2.jar /data/cloudera/parcels/SPARK2-2.1.0.cloudera1-1.cdh5.7.0.p0.120904/lib/spark2/jars
[root@ctdn-1 dist]# scp elasticsearch-spark-20_2.11-6.1.2.jar root@ctdn-2:/data/cloudera/parcels/SPARK2-2.1.0.cloudera1-1.cdh5.7.0.p0.120904/lib/spark2/jars
......
[root@ctdn-1 dist]# scp elasticsearch-spark-20_2.11-6.1.2.jar root@ctdn-6:/data/cloudera/parcels/SPARK2-2.1.0.cloudera1-1.cdh5.7.0.p0.120904/lib/spark2/jars
5.中文分词器:
smartcn
bin/elasticsearch-plugin install analysis-smartcn
ik
bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.3/elasticsearch-analysis-ik-5.5.3.zip
使用ik中文分词
创建index
curl -XPUT http://localhost:9200/ikcase
建立索引映射
curl -XPOST http://localhost:9200/ikcase/fulltext/_mapping -d'
{
"properties": {
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
}
}
}'
索引一些文档
curl -XPOST http://localhost:9200/ikcase/fulltext/1 -d'
{"content":"美国留给伊拉克的是个烂摊子吗"}
'
curl -XPOST http://localhost:9200/ikcase/fulltext/2 -d'
{"content":"公安部:各地校车将享最高路权"}
'
curl -XPOST http://localhost:9200/ikcase/fulltext/3 -d'
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
'
curl -XPOST http://localhost:9200/ikcase/fulltext/4 -d'
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
'
带有高亮效果的查询
curl -XPOST http://localhost:9200/ikcase/fulltext/_search -d'
{
"query" : { "match" : { "content" : "中国" }},
"highlight" : {
"pre_tags" : ["<tag1>", "<tag2>"],
"post_tags" : ["</tag1>", "</tag2>"],
"fields" : {
"content" : {}
}
}
}
'
6.
pyspark与elasticsearch的结合
http://blog.youkuaiyun.com/xmo_jiao/article/details/73251937
http://www.yiibai.com/elasticsearch/elasticsearch_query_dsl.html
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/spark.html
morelikethis
https://github.com/alexksikes/mlt-play
http://10.11.8.32:5000/movielens/
https://www.elastic.co/guide/en/elasticsearch/reference/1.7/query-dsl-mlt-query.html
http://blog.youkuaiyun.com/xiao_jun_0820/article/details/51087214
图计算
https://github.com/o19s/elastic-graph-recommender