import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
/**
* 功能简述:
*
* @author caidingnu
* @version 1.0.0
* @create 2019-04-02 20:11
*/
public class Spider {
public static void main(String[] args) throws IOException {
for (int j = -1; j < 20; j++) {
Document doc = Jsoup.connect("https://search.jd.com/Search?keyword=%E7%AC%94%E8%AE%B0%E6%9C%AC&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&page="+(j+2)+"&s=56&click=0").post();
String title = doc.title();
String[] hn = doc.select("img.err-product").toString().split("<img width=\"220\" height=\"220\" class=\"err-product\" data-img=\"1\" source-data-lazy-img=\"");
for (int i = 1; i < hn.length; i++) {
int start = hn[i].indexOf("//");
int end = hn[i].indexOf("jpg");
System.out.println(start);
System.out.println(end);
download(hn[i].substring(start, end + 3));
System.out.println(hn[i].substring(start, end + 3));
}
}
}
/**
* @Description:
* @Param:
* @return:
* @Author: caidingnu
* @Date: 2019/4/2
*/
public static void download(String imageUrl) throws IOException {
URL url = new URL("http:" + imageUrl);
//打开网络输入流
DataInputStream dis = new DataInputStream(url.openStream());
String newImageName = "C:\\Users\\cdn\\Desktop\\新建文件夹\\" + new Date().getTime() + ".jpg";
//建立一个新的文件
FileOutputStream fos = new FileOutputStream(new File(newImageName));
byte[] buffer = new byte[1024];
int length;
//开始填充数据
while ((length = dis.read(buffer)) > 0) {
fos.write(buffer, 0, length);
}
dis.close();
fos.close();
}
}