java多线程爬图书图片

学习笔记仅供参考

1.xiancheng.class

package com.example.util;

import com.example.service.bookservice;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;

import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.Random;

public class xiancheng implements Runnable{
    ArrayList<String>ips=new ArrayList<String>();
    String[] names;
    public xiancheng(ArrayList<String>ipss,String[] namess){
        ips=ipss;
        names=namess;
    }

    @Override
    public void run() {
//        ArrayList<String> ips = ips();
        int i = 0;
        Random x = new Random();
        int j = x.nextInt(100);
        for (String bookname : names) {
            int z=x.nextInt(19000);
            bookname=names[z];
            File f = new File("C:/IMAGES/"+bookname+".jpg");
            if(f.exists()){
                continue;
            }
            String ip = ips.get(j);

            i++;
            if (i % 20 == 0) {
                j++;
            }
            if (j%20==0) {
                try {
                    ips=new pachong().ips1();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                j = 0;
            }
            InputStream in = null;
            FileOutputStream out = null;

            String path = null;
            try {
                path = "http://book.ucdrs.superlib.net/search?sw=" + URLEncoder.encode(bookname, "utf-8") + "&allsw=&bCon=&ecode=utf-8&channel=search&Field=all";
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            Random r = new Random();
            String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
                    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
                    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
                    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
                    "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
                    "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};
            int k = r.nextInt(14);
            String[] r1 = ip.split(":");
            System.getProperties().setProperty("http.proxyHost", r1[0]);
            System.getProperties().setProperty("http.proxyPort", r1[1]);
            System.err.println(r1[0]+":"+r1[1]);

            Document doc = null;
            try {
                doc = Jsoup.connect(path)
                        .timeout(50000)
                        .ignoreHttpErrors(true)
                        .userAgent(ua[k])

                        .get();
            } catch (IOException e) {
                e.printStackTrace();
            }
            Elements els = doc.select(".px14 img");
            Elements els1 = doc.select("#dxid0");

            System.out.println(Thread.currentThread().getName()+" 下载的:"+bookname+"个数为:"+els.size());
            if (els.size() < 1) {

                continue;
            }
            String imagePath = "";
            for (Element el : els) {
                imagePath = el.attr("src").trim().toString();
                break;
            }
            String no="";
            for (Element el : els1) {
                no = el.attr("value").trim().toString();
                System.out.println(no);
                break;
            }
            System.out.println(imagePath);
            String path1="";
            try {
                path1="http://book.ucdrs.superlib.net/views/specific/2929/bookDetail.jsp?dxNumber="+no+URLEncoder.encode(bookname, "utf-8");
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            Document doc1 = null;
            try {
                doc = Jsoup.connect(path1)
                        .timeout(50000)
                        .ignoreHttpErrors(true)
                        .userAgent(ua[k])
                        .header("referer", "http://book.ucdrs.superlib.net/antispiderShowVerify.ac")
                        .get();
            } catch (IOException e) {
                e.printStackTrace();
            }
            Elements els2 = doc.select(".tubookimg img");

            URL url = null;// 创建url对象
            try {
                url = new URL(imagePath);
            } catch (MalformedURLException e) {
                e.printStackTrace();
            }
            for (Element el : els2) {
                imagePath = el.attr("src").trim().toString();
                break;
            }
            HttpURLConnection conn = null;// 打开连接
            try {
                conn = (HttpURLConnection) url.openConnection();
            } catch (IOException e) {
                e.printStackTrace();
            }

            conn.setRequestProperty("contentType", "GBK"); // 设置url中文参数编码

            conn.setConnectTimeout(5 * 1000);// 请求的时间

            try {
                conn.setRequestMethod("GET");// 请求方式
            } catch (ProtocolException e) {
                e.printStackTrace();
            }


//            System.out.println(doc.title());
//            System.out.println(doc.html());

            //从 Doc 的树形结构中查找 img 标签
            //.class 选择器

            try {
                in = conn.getInputStream();
            } catch (IOException e) {
                e.printStackTrace();
            }
            // readLesoSysXML(inStream);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            try {
                conn = (HttpURLConnection) new URL(imagePath).openConnection();
                //读取数据
                String uu = "C:\\IMAGES\\";
                //获得图片的名字
                int index = imagePath.lastIndexOf('/');
                String file = imagePath.substring(index + 1);
                file = uu + bookname + ".jpg";
                //创建输出流,写入
                out = new FileOutputStream(file);

                byte[] buf = new byte[1024 + 16];
                int size;
                while (-1 != (size = in.read(buf))) {
                    out.write(buf, 0, size);
                }
                //下载完成
                String name = Thread.currentThread().getName();
            } catch (MalformedURLException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                //不论是否发生异常都会执行的
                if (out != null) {
                    try {
                        out.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }

                if (conn != null) {
                    conn.disconnect();
                }
            }
        }
    }
}

2.pachong.class

package com.example.util;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.example.service.bookservice;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;

import java.io.*;
import java.lang.reflect.Array;
import java.net.*;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Controller
public class pachong {
    @Autowired
    bookservice bs;

    public static void main(String[] args) throws IOException {


    }

    public ArrayList<String> ips() throws IOException {
        String path = "http://ip3.11jsq.com/index.php/api/entry?method=proxyServer.generate_api_url&fetch_key=&groupid=0&city=&port=1&css=&specialTxt=3&specialJson=&usertype=2&pro=&packid=0&fa=0&time=101&qty=100&format=txt&ss=1&dt=1&nofreq=hZvph5Mx47od";// 要获得html页面内容的地址

        URL url = new URL(path);// 创建url对象

        HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 打开连接
        conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
        conn.setRequestProperty("contentType", "GBK"); // 设置url中文参数编码

        conn.setConnectTimeout(5 * 1000);// 请求的时间

        conn.setRequestMethod("GET");// 请求方式

        InputStream inStream = conn.getInputStream();
        // readLesoSysXML(inStream);

        BufferedReader in = new BufferedReader(new InputStreamReader(inStream, "GBK"));
        StringBuffer buffer = new StringBuffer();
        ArrayList<String> ipp = new ArrayList<String>();
        String line = "";
        // 读取获取到内容的最后一行,写入
        while ((line = in.readLine()) != null) {
            buffer.append(line);
            ipp.add(line);
            System.out.println(line);
        }
        String str = buffer.toString();
//    JSONObject json1 = JSONObject.parseObject(str);
//    JSONArray jsons =  JSONArray.parseArray(json1.get("data").toString());

//    for(Object json:jsons){
//        JSONObject ips = JSONObject.parseObject(json.toString());
//        String ip = ips.get("IP").toString();
//        System.out.println(ip);
//        ipp.add(ip);
//    }
        return ipp;

    }

    public ArrayList<String> ips1() throws IOException {
        String path = "http://ip3.11jsq.com/index.php/api/entry?method=proxyServer.generate_api_url&fetch_key=&groupid=0&city=&port=1&css=&specialTxt=3&specialJson=&usertype=2&pro=&packid=0&fa=0&time=101&qty=20&format=txt&ss=1&dt=1&nofreq=hZvph5Mx47od";// 要获得html页面内容的地址

        URL url = new URL(path);// 创建url对象

        HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 打开连接
        conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
        conn.setRequestProperty("contentType", "GBK"); // 设置url中文参数编码

        conn.setConnectTimeout(5 * 1000);// 请求的时间

        conn.setRequestMethod("GET");// 请求方式

        InputStream inStream = conn.getInputStream();
        // readLesoSysXML(inStream);

        BufferedReader in = new BufferedReader(new InputStreamReader(inStream, "GBK"));
        StringBuffer buffer = new StringBuffer();
        ArrayList<String> ipp = new ArrayList<String>();
        String line = "";
        // 读取获取到内容的最后一行,写入
        while ((line = in.readLine()) != null) {
            buffer.append(line);
            ipp.add(line);
            System.out.println(line);
        }
        String str = buffer.toString();
//    JSONObject json1 = JSONObject.parseObject(str);
//    JSONArray jsons =  JSONArray.parseArray(json1.get("data").toString());

//    for(Object json:jsons){
//        JSONObject ips = JSONObject.parseObject(json.toString());
//        String ip = ips.get("IP").toString();
//        System.out.println(ip);
//        ipp.add(ip);
//    }
        return ipp;

    }

    public void xiancheng(String[] names) throws IOException, InterruptedException {
        ExecutorService pool = Executors.newFixedThreadPool(30);

//        ArrayList<String>ips = new ArrayList<>();
//        try {
//            File file = new File("C:/IMAGES/ip.txt");
//            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
//            String strLine = null;
//            int lineCount = 1;
//            while(null != (strLine = bufferedReader.readLine())){
//                System.err.println(strLine);
//                ips.add(strLine);
//                lineCount++;
//            }
//        }catch(Exception e){
//            e.printStackTrace();
//        }

            ArrayList ips = ips();
            for (int i = 0; i < 35; i++) {
                pool.execute(new xiancheng(ips, names));
            }

    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值