ElasticSearch学习
ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!
1. 安装
这里我使用阿里云服务器,并且采用Docker 安装ES
安装elasticsearch
# 1.拉取镜像
docker pull elasticsearch:7.7.1
# 2.生成容器
docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1
安装kibana
# 1.下载kibana镜像到本地
docker pull kibana:7.7.1
# 2.启动kibana容器
docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1
安装elasticsearch-head
# 1.下载镜像
docker pull mobz/elasticsearch-head:5
# 2.生成容器
docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5
# 3.在这里可能会出现跨域拒绝访问问题
进入elasticsearch容器内部,修改配置文件elasticsearch.yml
docker ps -a #拿到运行容器elasticsearch 的 id
docker exec -it ******(容器id) /bin/bash
cd ./config
vi elasticsearch.yml
在elasticsearch.yml中添加:
http.cors.enabled: true
http.cors.allow-origin: "*"
然后重启容器
docker restart es
安装IK分词器
# 1.下载对应版本的IK分词器
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip
# 2.解压到plugins/elasticsearch文件夹中
yum install -y unzip #下载unzip
unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip
# 3.添加自定义扩展词和停用词
cd plugins/elasticsearch/config
vim IKAnalyzer.cfg.xml
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext_dict.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">ext_stopwords.dic</entry>
</properties>
# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效
vim ext_dict.dic 加入扩展词即可
# 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件
vim ext_stopwords.dic 加入停用词即可
# 6.将此容器提交成为一个新的镜像
docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2
# 7.使用新生成的这个es镜像创建容器,并挂载数据卷
docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2
2.项目实战(基于es的仿京东搜索)
-
爬虫
- 导入jsoup依赖
<!-- 解析网页 使用jsoup 爬虫--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency>
- 编写测试,生成工具类
package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
// public static void main(String[] args) throws IOException {
// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
// }
/**
* 解析京东页面 获取数据
* @param keyword
* @return
* @throws IOException
*/
public static List<Content> parseJd(String keyword) throws IOException {
//1.获取搜索url
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
//2.通过jsoup解析 获取文档对象
Document document = Jsoup.parse(new URL(url), 30000);
//3.接下来的操作和js一样了
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList<Content> contentArrayList = new ArrayList<>();
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
-
前后端分离实现
-
后端实现
整体结构
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.3.0.RELEASE</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>com.ittao</groupId> <artifactId>elasticsearch_study</artifactId> <version>0.0.1-SNAPSHOT</version> <name>elasticsearch_study</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> <!-- 保持和自己的es版本一致 版本不一致会导致连不上--> <elasticsearch.version>7.7.1</elasticsearch.version> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-elasticsearch</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-devtools</artifactId> <scope>runtime</scope> <optional>true</optional> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-configuration-processor</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> <!-- 解析网页 使用jsoup 爬虫--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.61</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> <exclusions> <exclusion> <groupId>org.junit.vintage</groupId> <artifactId>junit-vintage-engine</artifactId> </exclusion> </exclusions> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>
config
package com.ittao.config; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; /** * ElasticsearchConfig * 这是es配置类 注入到spring容器 * create by 黄小涛 * 2020-06-07 */ @Configuration public class ElasticsearchConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("47.101.52.63", 9200, "http"))); return client; } }
entity
package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; /** * Content * <p> * create by 黄小涛 * 2020-06-08 */ @Data @AllArgsConstructor @NoArgsConstructor public class Content { private String title; private String img; private String price; }
package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.Accessors; import org.springframework.stereotype.Component; /** * User * <p> * create by 黄小涛 * 2020-06-08 */ @Data @AllArgsConstructor @NoArgsConstructor @Accessors(chain = true) @Component public class User { private String name; private int age; }
service
package com.ittao.service.impl; import com.alibaba.fastjson.JSON; import com.ittao.entity.Content; import com.ittao.service.ContentService; import com.ittao.utils.HtmlParseUtil; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.FuzzyQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @Service public class ContentServiceImpl implements ContentService { @Autowired @Qualifier("restHighLevelClient") private RestHighLevelClient client; /** * 将爬取的数据添加到es中 * @param keyword * @return * @throws IOException */ @Override public boolean addToEs(String keyword) throws IOException { //1.获取要添加的数据 List<Content> contentList = HtmlParseUtil.parseJd(keyword); //2.创建批量添加请求 BulkRequest request = new BulkRequest(); //3.批量添加数据 for (Content content : contentList) { request.add(new IndexRequest("jd_goods"). //添加到jd_goods这个索引中 source(JSON.toJSONString(content), XContentType.JSON)); } request.timeout(new TimeValue(2, TimeUnit.MINUTES)); //4.执行批量添加请求 BulkResponse response = client.bulk(request, RequestOptions.DEFAULT); //5.获取响应 return !response.hasFailures(); } @Override public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException { if (pageNo<=0){ pageNo=1; } //根据关键字进行搜索 //1.创建搜索请求 SearchRequest request = new SearchRequest("jd_goods"); //2.添加搜索条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //添加分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); //添加高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); //设置高亮字段 highlightBuilder.field("title"); //是否显示多个高亮 highlightBuilder.requireFieldMatch(true); //设置高亮前缀 highlightBuilder.preTags("<span style='color:red'>"); //设置高亮后缀 highlightBuilder.postTags("</span>"); sourceBuilder.highlighter(highlightBuilder); //根据关键字搜索title包含的 MatchQueryBuilder termQuery = QueryBuilders.matchQuery("title", keyword); sourceBuilder.query(termQuery); sourceBuilder.timeout(new TimeValue(1, TimeUnit.MINUTES)); request.source(sourceBuilder); //3.执行搜索 SearchResponse response = client.search(request, RequestOptions.DEFAULT); //4.将响应数据进行封装 List<Map<String, Object>> mapList = new ArrayList<>(); for (SearchHit documentFields : response.getHits().getHits()) { //目标:将高亮字段替换我们原先的字段 Map<String, Object> sourceAsMap = documentFields.getSourceAsMap(); //原先的字段 //1.获取高亮的全部字段 Map<String, HighlightField> highlightFields = documentFields.getHighlightFields(); //2.获取我们设置的title高亮字段 HighlightField title = highlightFields.get("title"); //3.解析高亮的字段 if (title!=null){ //获取高亮片段 Text[] fragments = title.getFragments(); String n_title=""; for (Text fragment : fragments) { n_title +=fragment; } //4.替换 sourceAsMap.put("title", n_title); } mapList.add(sourceAsMap); } return mapList; } }
controller
package com.ittao.Controller; import com.ittao.service.ContentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.CrossOrigin; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RestController; import java.io.IOException; import java.util.List; import java.util.Map; @RestController @CrossOrigin public class ContentController { @Autowired private ContentService contentService; @GetMapping("/addToEs/{keyword}") public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException { return contentService.addToEs(keyword); } @GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}") public List<Map<String, Object>> searchPage(@PathVariable("keyword") String keyword, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException { return contentService.searchPage(keyword, pageNo, pageSize); } }
utils
package com.ittao.utils; import com.ittao.entity.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; public class HtmlParseUtil { // public static void main(String[] args) throws IOException { // HtmlParseUtil.parseJd("黄涛").forEach(System.out::println); // // } /** * 解析京东页面 获取数据 * @param keyword * @return * @throws IOException */ public static List<Content> parseJd(String keyword) throws IOException { //1.获取搜索url String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8"; //2.通过jsoup解析 获取文档对象 Document document = Jsoup.parse(new URL(url), 30000); //3.接下来的操作和js一样了 Element j_goodsList = document.getElementById("J_goodsList"); Elements elements = j_goodsList.getElementsByTag("li"); ArrayList<Content> contentArrayList = new ArrayList<>(); for (Element element : elements) { String img = element.getElementsByTag("img").eq(0).attr("src"); String price = element.getElementsByClass("p-price").text(); String title = element.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(img); content.setPrice(price); contentArrayList.add(content); } return contentArrayList; } }
前端实现
首页
<template>
<div>
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
<div class="link">
<h1>ElasticSearch的简单实战</h1>
<p>第一个功能:从京东商城中爬取我们搜索的数据,存放到eslasticsearch中</p>
<p>第二个功能:从eslasticsearch中根据关键字查询我们的数据,进行展示</p>
<router-link to="/search" ><p class="text">点我去搜索数据</p></router-link>
<router-link to="/generateData"><p class="text">点我去爬取数据</p></router-link>
</div>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "Home",
data() {
return {
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {}
};
</script>
<style scoped>
.text{
font-size: 20px;
}
.link{
text-align: left;
margin-left: 450px;
}
.logo{
height: 200px;
}
</style>
查询页面
!<template>
<div>
<el-container>
<el-header>
<el-row>
<el-row class="head">
<!-- logo图片部分 -->
<el-col :span="8">
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
</el-col>
<!-- 搜索框 -->
<el-col :span="4">
<div class="input">
<el-input placeholder="请输入搜索内容" v-model="input" clearable></el-input>
</div>
</el-col>
<el-col :span="1">
<div class="input">
<el-button type="danger" @click="searchData">搜索</el-button>
</div>
</el-col>
<el-col :span="4" :offset="6">
<div class="input">
<el-link href="/generateData">点我去爬取数据</el-link>
<el-link href="/">点我去首页</el-link>
</div>
</el-col>
</el-row>
</el-row>
</el-header>
<!-- 图片展示部分 -->
<div class="content">
<div class="row">
<ul>
<li v-for="(item,index) in dataList" :key="index">
<div class="col">
<div class="image">
<img height="220px" :src="item.img" />
</div>
<div class="p-price">
<strong>
<i>{{item.price}}</i>
</strong>
</div>
<div class="p-title">
<p class="p-title1" v-html="item.title"></p>
</div>
<div class="p-commit">
<strong>
<a target="_blank" href="https://www.jd.com/">1300+</a>条评价
</strong>
</div>
<div class="p-shop">
<a target="_blank" class="curr-shop hd-shopname" href="https://www.jd.com/" title="文轩网旗舰店">文轩网旗舰店</a>
</div>
</div>
</li>
</ul>
</div>
</div>
</el-container>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "Search",
data() {
return {
logosrc: Logosrc,
input: "",
dataList: []
};
},
components: {},
created() {},
methods: {
searchData() {
//获得搜索的关键字
console.log(this.input)
//发送axios请求
this.$http.get(`/searchPage/${this.input}/${1}/${20}`).then(res =>{
console.log(res.data)
this.dataList = res.data
if(this.dataList.length < 1){
alert("暂无数据请重新搜索或者去生成数据!")
}
})
}
}
};
</script>
<style scoped>
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
.content {
/* border: 1px solid sandybrown; */
width: 100%;
height: 1200px;
margin: 50px auto;
}
.row {
/* border: 1px solid saddlebrown; */
width: 100%;
height: 400px;
float: left;
}
.col {
/* border: 1px solid tan; */
width: 25%;
height: 400px;
float: left;
}
.image {
text-align: left;
margin-left: 20px;
}
.p-price {
text-align: left;
margin-left: 20px;
color: red;
}
.p-title1{
font-size: 10px;
}
.p-commit {
text-align: left;
}
.p-shop {
text-align: left;
}
ul li {
list-style-type: none;
}
</style>
生成数据页面
<template>
<div>
<el-container>
<el-header>
<el-row>
<el-row class="head">
<!-- logo图片部分 -->
<el-col :span="8">
<div class="logo">
<el-link href="https://www.jd.com/" target="_blank">
<el-image :src="logosrc" class="imge"></el-image>
</el-link>
</div>
</el-col>
<!-- 搜索框 -->
<el-col :span="4">
<div class="input">
<el-input placeholder="请输入需要生成数据内容" v-model="input" clearable></el-input>
</div>
</el-col>
<el-col :span="1">
<div class="input">
<el-button type="danger" @click="generateData">生成</el-button>
</div>
</el-col>
<el-col :span="4" :offset="6">
<div class="input">
<el-link href="/search">点我去查询</el-link>
<el-link href="/">点我去首页</el-link>
</div>
</el-col>
</el-row>
</el-row>
</el-header>
</el-container>
</div>
</template>
<script >
import Logosrc from "../assets/img/logo.png";
export default {
name: "GenerateData",
data() {
return {
input: "",
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {
generateData() {
this.$http.get(`/addToEs/${this.input}`).then(res => {
console.log(res.data);
if (res.data == true) {
this.$message({
type: "success",
message: "生成数据成功,你可以去查询啦!"
});
}
});
}
}
};
</script>
<style scoped>
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
</style>
route中index.js
import Vue from 'vue'
import VueRouter from 'vue-router'
import Search from '../views/Search.vue'
import Home from '../views/Home.vue'
import GenerateData from '../views/GenerateData.vue'
Vue.use(VueRouter)
const routes = [
{
path: '/',
name: 'Home',
component: Home
},
{
path: '/search',
name: 'Search',
component: Search
},
{
path: '/generateData',
name: 'GenerateData',
component: GenerateData
}
]
const router = new VueRouter({
mode: 'history',
base: process.env.BASE_URL,
routes
})
export default router
main.js
import Vue from 'vue'
import App from './App.vue'
import router from './router'
import store from './store'
import ElementUI from 'element-ui'
import 'element-ui/lib/theme-chalk/index.css'
import axios from 'axios'
Vue.config.productionTip = false
Vue.use(ElementUI);
Vue.prototype.$http = axios
Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面,除非 `url` 是一个绝对 URL
new Vue({
router,
store,
render: h => h(App)
}).$mount('#app')
3.总结
通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下
1.es的安装,尤其通过docker安装
2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作
3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,
4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力