TMDB数据导入elasticsearch7

目录

1、下载tmdb数据下载地址:TMDB 5000 Movie Dataset | Kaggle

2、下载两个文档(tmdb_5000_credits.csv和tmdb_5000_movies.csv)

3、项目POM文件

4、导入ES代码

5、新建ES索引


1、下载tmdb数据
下载地址:TMDB 5000 Movie Dataset | Kaggle

注意:注册账号需要翻墙才可以

2、下载两个文档(tmdb_5000_credits.csv和tmdb_5000_movies.csv)

    整合文档将Excel进行整合,将需要的字段整合到move.csv

3、项目POM文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.6.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.imooc</groupId>
    <artifactId>dianping</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>dianping</name>
    <description>dianping spring boot java project</description>
    <properties>
        <java.version>1.8</java.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compile.target>1.8</maven.compile.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-aop</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>7.3.0</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>7.3.0</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.plugin</groupId>
            <artifactId>transport-netty4-client</artifactId>
            <version>7.3.0</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.58</version>
        </dependency>
        <dependency>
            <groupId>com.opencsv</groupId>
            <artifactId>opencsv</artifactId>
            <version>4.2</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

4、导入ES代码

package com.imooc.dianping.service.impl;


import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.imooc.dianping.service.SellerService;
import com.opencsv.CSVReader;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.List;

@Service
public class SellerServiceImpl implements SellerService {
    @Autowired
    TransportClient transportClient;

    @Override
    public void importData() {
        BulkRequest bulkRequest = new BulkRequest();
        int lineIndex = 0;
        try {
            InputStreamReader reader = new InputStreamReader(new FileInputStream("./tmdb_5000_movies.csv"), Charset.forName("UTF-8"));
            CSVReader csvReader = new CSVReader(reader, ',');
            //读取CSV文件
            List<String[]> allReader = csvReader.readAll();
            for (String[] records :
                    allReader) {
                lineIndex++;
                if (lineIndex == 1) {
                    continue;
                }
                System.out.println("第"+lineIndex+"行");
                if(StringUtils.isEmpty(records[20])){
                    continue;
                }
                if(records[20].contains("[]")){
                    continue;
                }
                JSONArray jsonArray = JSONArray.parseArray(records[20]);
                //获取文档字段
                String character = jsonArray.getJSONObject(0).getString("character");
                String name = jsonArray.getJSONObject(0).getString("name");
                JSONObject cast = new JSONObject();
                cast.put("character", character);
                cast.put("name", name);
                String date = records[11];
                if (StringUtils.isEmpty(date)) {
                    date = "1970/01/01";
                }
                bulkRequest.add(new IndexRequest("movie", "_doc", String.valueOf(lineIndex)).source(XContentType.JSON,
                        "title", records[17]
                        , "tagline", records[16]
                        , "release_date", date
                        , "popularity", records[8]
                        , "cast", cast
                        , "overview", records[7]));
            }
            reader.close();
            //将数据导入ES
            transportClient.bulk(bulkRequest, new ActionListener<BulkResponse>() {
                @Override
                public void onResponse(BulkResponse bulkItemResponses) {
                    System.out.println(bulkItemResponses);
                }

                @Override
                public void onFailure(Exception e) {

                }
            });
        } catch (FileNotFoundException e) {
            System.out.println("第---->"+lineIndex+"行");
            e.printStackTrace();
        } catch (IOException e) {
            System.out.println("第---->"+lineIndex+"行");
            e.printStackTrace();
        }
    }
}

5、新建ES索引

PUT /movie
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "english"
      },
      "tagline": {
        "type": "text",
        "analyzer": "english"
      },
      "release_date": {
        "type": "date",
        "format": "8yyyy-MM-dd||yyyy-M-dd||yyyy-MM-d||yyyy-M-d"
      },
      "popularity": {
        "type": "double"
      },
      "cast": {
        "type": "object",
        "properties": {
          "character": {
            "type": "text",
            "analyzer": "standard"
          },
          "name": {
            "type": "text",
            "analyzer": "standard"
          }
        }
      },
      "overview": {
        "type": "text",
        "analyzer": "english"
      }
    }
  }
}

为了方便导入,源码和文档进行了整合,代码如下

abel/importData

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值