天猫、淘宝商品详情、库存、价格抓包

本文介绍了如何抓取天猫和淘宝平台上的商品详情、库存及价格信息,包括抓包方法和可能遇到的挑战。适合对网络爬虫感兴趣的读者学习。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

如有侵权,请联系作者删除

水平有限,还望大牛指点

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>

 

import com.sun.tools.doclets.formats.html.SourceToHTMLConverter;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with Chenquan.
 * Description: 淘宝抓包
 * Date: 2018-12-13
 * Time: 15:12
 */
public class TaobaoCatch {

    public static void main(String[] args) {
        int i = 0;

/*        String url = "https://acs.m.taobao.com/h5/mtop.taobao.wsearch.h5search/1.0/?jsv=2.3.16&appKey=12574478&t=1545023581359&sign=e3476c9041a75de0a9190da470204d93&api=mtop.taobao.wsearch.h5search&v=1.0&H5Request=true&ecode=1&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22q%22%3A%22%E4%BB%99%E6%B6%B5%E5%86%85%E8%A1%A3%22%2C%22search%22%3A%22%E6%8F%90%E4%BA%A4%22%2C%22tab%22%3A%22all%22%2C%22sst%22%3A%221%22%2C%22n%22%3A20%2C%22buying%22%3A%22buyitnow%22%2C%22m%22%3A%22api4h5%22%2C%22token4h5%22%3A%22%22%2C%22abtest%22%3A%221%22%2C%22wlsort%22%3A%221%22%2C%22page%22%3A1%7D";

        Connection con = Jsoup.connect(url);
        con.header("Cookie", "cna=TA+aFFGXQFUCAXQaRYGZVU8Q; t=efa81a9785cd86f885e13998b6d5f9cb; thw=cn; uc3=vt3=F8dByRzMU9X8Hvccr00%3D&id2=W8zLpWipxVFu&nk2=0PLo6GHZOM8%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; tracknick=%5Cu9648%5Cu94E81992; lgc=%5Cu9648%5Cu94E81992; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; enc=4rB%2FfKFx8DJKgPpoHlZjr824CEYw%2BlPaKBDWbFO4fnh6svGA97NoZNGERui4fOo2tXSnSVN1ygkfn5R5ekztTQ%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_1; _m_h5_tk=e501ac7690832934d663aef19ee36be5_1545033419107; _m_h5_tk_enc=5147579a652b4fb508dc886d59c37045; isg=BFVVgDOkpYNz64H7Z31pC9thZFHP-goqhI4h7tf6EUwbLnUgn6IZNGPv_DSYLiEc");
//        con.header("referer", "https://item.taobao.com/item.htm ");
        Connection.Response resp = null;
        try {
            resp = con.method(Connection.Method.GET).ignoreContentType(true).execute();
        } catch (IOException e) {
            e.printStackTrace();
        }
        String body = resp.body();
//        System.out.println(body);
        body = body.substring(12, body.length() - 1);
        JSONObject jb = JSONObject.fromObject(body);
        JSONArray jsonArray = jb.getJSONObject("data").getJSONArray("listItem");


//        while(i<100){
            i++;
            for (int j = 0; j < jsonArray.size(); j++) {
                JSONObject jsonObject = jsonArray.getJSONObject(j);
                String item_id = jsonObject.getString("item_id");
                System.out.println("item_id: "+item_id);
                getAll(item_id);
            }
//        }*/

        //传链接上的产品id
        getAll("577996531297");


    }

    public static void getAll(String item_id ) {
        try {
            Thread.sleep(2000);//一个休息5s,太快会被禁
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        System.out.println("开始时间:" + new Date());
        Date dateStart = new Date();
        Document doc = null;
        String id = "";
        try {
//            int i = 0;
// 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值