目标URL:V女神
package main
import (
"errors"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"path"
"regexp"
"strings"
"github.com/tidwall/gjson"
)
func main() {
DoWork(nil)
}
// 处理数据
func ProcessData(dataText string) (hasMore int64, queryCtime string) {
errno := gjson.Get(dataText, "errno")
// errno等于0时,表示成功
if errno.Int() != 0 {
errmsg := gjson.Get(dataText, "errmsg")
fmt.Println(errmsg.String())
return
}
// hasMore等于1时,表示还有下一页
hasMore = gjson.Get(dataText, "data.hasMore").Int()
// 请求时间,下次请求使用
queryCtime = gjson.Get(dataText, "data.query.ctime").String()
// if hasMore == 1 {
// fmt.Println(queryCtime)
// }
// 数据列
list := gjson.Get(dataText, "data.list")
list.ForEach(func(key, value gjson.Result) bool {
// 标题
title := gjson.Get(value.String(), "itemData.title").String()
// 去掉换行
title = strings.Replace(title, "\n", " ", -1)
// 媒体类型
mediaType := gjson.Get(value.String(), "itemData.media_type").Int()
// mediaType 2为图片类型,3为视频类型,这里只需要图片类型
if mediaType == 2 {
// 根据标题创建目录
savePath := strings.Join([]string{"./V女神", title}, "/")
os.MkdirAll(savePath, os.ModePerm)
// 获取详情页面URL,这里不需要使用
// nid := gjson.Get(value.String(), "itemData.tts.id").String()
// feedId := gjson.Get(value.String(), "feed_id").String()
// mainUrl := `https://author.baidu.com/dynamic?action=dynamic&context={"nid":"` + nid + `","feed_id":"` + feedId + `","ugc":0,"anchor":"","from":"starhome"}`
page := make(chan int)
c := 0
// 图片列表
imgSrc := gjson.Get(value.String(), "itemData.imgSrc")
imgSrc.ForEach(func(key, value gjson.Result) bool {
// 图片源地址
ImgUrl := gjson.Get(value.String(), "original.url").String()
go HttpGetDownload(ImgUrl, savePath, c, page)
c++
return true // keep iterating
})
fmt.Println(c)
fmt.Println()
for i := 0; i < c; i++ {
fmt.Println(<-page)
}
}
return true // keep iterating
})
return
}
// 下载保存图片
func HttpGetDownload(durl, savePath string, i int, page chan<- int) (err error) {
defer func(i int, page chan<- int) {
page <- i
}(i, page)
uri, erro := url.ParseRequestURI(durl)
// 网址是否正确
if err != nil {
err = erro
return
}
// 请求数据
resp, erro := http.Get(durl)
if erro != nil {
err = erro
return
}
defer resp.Body.Close()
// 提取文件名称
filename := path.Base(uri.Path)
filepath := strings.Join([]string{savePath, filename}, "/")
// 创建文件
file, erro := os.Create(filepath)
if erro != nil {
err = erro
return
}
// 关闭文件
defer file.Close()
// 每次读取多少数据
buf := make([]byte, 1024*4)
for {
n, _ := resp.Body.Read(buf)
if n == 0 { // 读取结束,或者出问题
// fmt.Println("resp.Body.Read err =", err)
break
}
file.Write(buf[:n]) // 往文件写入内容
}
return
}
func HttpGet(url string) (result string, err error) {
client := &http.Client{}
// 创建请求
req, erro := http.NewRequest("GET", url, nil)
if erro != nil {
err = erro
return
}
// 设置请求头信息
req.Header.Set("Accept", "application/json")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
req.Header.Set("Cookie", "BAIDUID=DBA49610A5448D1B534FD1FE56DC15D5:FG=1; PSTM=1563194999; BIDUPSID=1BCBB67D8BB74CBF87A878E4F6D05BD7; BDUSS=E1sUi1FZWtjb2JSSH5iTmZjRjZJd3diNVFsQkZIaUhDNH5jbHhzOFNWaFQtNUZkSVFBQUFBJCQAAAAAAAAAAAEAAAC~JYxSus68-7OuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFNual1TbmpdU; H_PS_PSSID=1464_21109_29522_29720_29567_29220_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=6; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hmery-Time=2965756316")
// 发起请求
resp, erro := client.Do(req)
if erro != nil {
err = erro
return
}
// 关闭Body
defer resp.Body.Close()
// 请求判断
if resp.StatusCode != 200 {
err = errors.New(resp.Status)
return
}
// 读取信息
body, erro := ioutil.ReadAll(resp.Body)
if erro != nil {
err = erro
return
}
// 正则规则
reg := regexp.MustCompile(`__jsonp\d{14}\((.*)\)`)
// 匹配
result = reg.ReplaceAllString(string(body), "$1")
return
}
func DoWork(ctime interface{}) {
ctimeQuery := ""
// 请求URL
if ctime != nil {
value, ok := ctime.(string)
if ok == true {
ctimeQuery = "&ctime=" + value
}
}
url := "https://mbd.baidu.com/webpage?tab=dynamic&num=6&uk=eJShY1irh16U4lWB6HOpiQ" + ctimeQuery + "&type=newhome&action=dynamic&format=jsonp&Tenger-Mhor=2965756316&callback=__jsonp01570273531590"
content, err := HttpGet(url)
if err != nil {
fmt.Println(err)
}
hasMore, queryCtime := ProcessData(content)
if hasMore == 1 {
fmt.Println(queryCtime)
DoWork(queryCtime)
}
}