jsoup爬取驾考题库

为了课程设计准备数据,爬取驾考的题库,包括题目、选项、答案、解析、图片

模型:Question 

@Entity
public class Question {
    private int id;
    private Integer num;
    private String type;
    private String chapter;
    private String question;
    private String options;
    private String answer;
    private String analysis;
    private String picpath;
    private String picurl;
...
}

目标网址:https://www.ybjk.com/tiku/02dec.htm

public void getInfo(QuestionService questionService) throws IOException {
        //1: 科目一 4: 科目四
        String url = "";
        String subject = "";
        url = "https://www.ybjk.com/tiku/02dec.htm";
        subject = "1";
//        url = "https://www.ybjk.com/tiku/d704f.htm";
//        subject = "4";
        int i = 1;
        while (!url.equals("")) {
            Document document = Jsoup.connect(url).timeout(4000).userAgent("Mozilla").get();
            String num = "";
            String type = "";
            String chapter = "";
            String question = "";
            String option = "";
            String answer = "";
            String analysis = "";
            String picpath = "";
            String picurl = "";
            //序号
            num = new Integer(i++).toString();
            //章节
            chapter = document.select(".h_Nav a").get(3).text();
            //题目
            question = document.selectFirst("#WinContent div strong a").text();
            //正确答案
            answer = document.selectFirst("#WinContent div ul li i u").text();
            //分析本题类型
            if (answer.equals("对") || answer.equals("错")) {
                //判断题 0代表科目一
                type = subject +":judge";
            } else if (answer.length() == 1) {
                //单选题
                type = subject + ":one";
            } else {
                //多选题
                type = subject + ":more";
            }
            //选择题选项
            if (type.equals(subject + ":one") || type.equals(subject + ":more")) {
                //选项option
                Elements elements = document.select("#WinContent div ul li");
                int f = 0;
                for (Element element : elements) {
                    if (f++ < 4) {
                        option += element.text() + ":";
                    }
                }
            } else {
                //判断题
                option = "对:错";
            }
            //题目解析
            analysis = document.select("#NoteContent p").get(1).text();
            //图片
            Element picElement = document.selectFirst("#WinContent div div img.min");
            if (picElement != null) {
                picurl = "https:" + picElement.attr("src");
                writePicTo("H:/dirver_image/", subject + "_" + num + ".gif", picurl);
            } else {
                picpath = "0";
                picurl = "0";
            }
            System.out.println("题号: " + num);
            System.out.println("类型: " + type);
            System.out.println("章节: " + chapter);
            System.out.println("题目: " + question);
            System.out.println("选项: " + option);
            System.out.println("答案: " + answer);
            System.out.println("题解: " + analysis);
            System.out.println("图片: " + picurl);
            System.out.println("图片本地地址: " + picpath);

            Question question1 = new Question();
            question1.setNum(Integer.parseInt(num));
            question1.setType(type);
            question1.setChapter(chapter);
            question1.setQuestion(question);
            question1.setOptions(option);
            question1.setAnswer(answer);
            question1.setAnalysis(analysis);
            question1.setPicpath(picpath);
            question1.setPicurl(picurl);
            //保存
            questionService.add(question1);
            //下一题url
            url = "https://www.ybjk.com" + document.select("#WinContent div table tr td a").last().attr("href");
            System.out.println("url: " + url);
        }
    }


    //爬图片 path:输出路径 name:图片名(含后缀名) url:图片地址
    private void writePicTo(String path, String name, String url) {
        URL urlConn = null;
        URLConnection connection = null;
        InputStream is = null;
        OutputStream os = null;
        try {
            urlConn = new URL(url);
            //打开网址连接
            connection = urlConn.openConnection();
            //获取连接中的数据流
            is = connection.getInputStream();
            //设置输出流
            os = new FileOutputStream(new File(path + name));
            System.out.println("保存图片到: " + path + name);
            //将输入流中的数据输出到输出流中
            byte[] b = new byte[1024];
            int count = 0;
            while((count=is.read(b))!=-1){
                os.write(b,0,count);
            }
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值