hand 包下
zonghengxs.go
免责声明: 本文章仅供于学习使用,不能拿入其他用途,后果自负哈,
package hand
import (
"regexp"
"strconv"
"strings"
"tools/otheroper"
)
//
type ZongHengXS struct {
PP ParseProcess
}
func (zh *ZongHengXS) GetClassifs() HandResult {
content := zh.PP.GetContent(zh.PP.ClassifRequestUrl)
//fmt.Printf("%s",content)
//<a href="/tag/小说" class="tag">小说</a>
// [^"]+ 拿到 以" 结尾的前面的字符
//([^"]+) 得到里面的字符串
re := regexp.MustCompile(zh.PP.ClassifRegexpstr)
match := re.FindAllSubmatch(content, -1)
result := HandResult{}
for _, m := range match {
str := otheroper.GetBuildStr(
"http://book.zongheng.com/store/c",
string(m[1]),
"/c0/b0/u0/p1/v9/s9/t0/u0/i1/ALL.html")
//http://book.zongheng.com/store/c6/c0/b0/u0/p1/v9/s9/t0/u0/i1/ALL.html
result.ResultUrls = append(result.ResultUrls, str)
result.Items = append(result.Items, string(m[2]))
}
return result
}
//获取某个分类下面的小说
func (zh *ZongHengXS) GetBooks(classifyUrl string, pageIndex int) (HandResult, bool) {
//booksurl := otheroper.GetBuildStr(
// "http://book.zongheng.com/store/c1/c0/b0/u0/p", "1", "/v9/s9/t0/u0/i1/ALL.html")
newstr:=strings.Replace(classifyUrl,"p#","p"+strconv.Itoa(pageIndex),1)
content := zh.PP.GetContent(newstr)
//得到 该分类下面的总页数
//rePageCount := regexp.MustCompile(`count="([0-9]*)"`)
//matchPageCount := rePageCount.FindAllSubmatch(content, -1)
//fmt.Println(matchPageCount[0][1])
//超过这个函数则为当前分类下面已经没有书籍了
//if convertoper.BytesToInt32(matchPageCount[0][1]) < int32(pageIndex) {
// return HandResult{
// ResultUrls: nil,
// Items: nil,
// }, false
//}
zh.PP.BookRequestUrl = classifyUrl
zh.PP.BookRegexpstr = `<a href="http://book.zongheng.com/book/([0-9]*).html" target="_blank">([^"]+)</a>`
re := regexp.MustCompile(zh.PP.BookRegexpstr)
match := re.FindAllSubmatch(content, -1)
result := HandResult{}
for _, m := range match {
str := otheroper.GetBuildStr(
"http://book.zongheng.com/book/", string(m[1]), ".html")
result.ResultUrls = append(result.ResultUrls, str)
result.Items = append(result.Items, string(m[2]))
}
return result, true
}
//获取某个小说下面的章节
func (zh *ZongHengXS) GetChapters(ChapterRequestUrl string) HandResult {
//zh.PP.ChapterRequestUrl=otheroper.GetBuildStr("http://book.zongheng.com/showchapter/",
// string(booknumber),".html")
//zh.PP.ChapterRequestUrl = "http://book.zongheng.com/showchapter/917253.html"
zh.PP.ChapterRequestUrl = ChapterRequestUrl
content := zh.PP.GetContent(zh.PP.ChapterRequestUrl)
zh.PP.ChapterRegexpstr = `href="([^"]+)" target="_blank" title="([^"]+)">([^"]+)</a>`
// <a href="http://book.zongheng.com/chapter/917253/61403035.html"
re := regexp.MustCompile(zh.PP.ChapterRegexpstr)
match := re.FindAllSubmatch(content, -1)
result := HandResult{}
for _, m := range match {
result.ResultUrls = append(result.ResultUrls, string(m[1]))
result.Items = append(result.Items, string(m[3]))
}
return result
}
//获取某个小说的一章
func (zh *ZongHengXS) GetBookContent(BookContentRequestUrl string) HandResult {
//zh.PP.BookContentRequestUrl = "http://book.zongheng.com/chapter/917253/60737080.html"
zh.PP.BookContentRequestUrl = BookContentRequestUrl
content := zh.PP.GetContent(zh.PP.BookContentRequestUrl)
zh.PP.BookContentRegexpstr = `<div class="content" itemprop="acticleBody">([^"]+)`
// <a href="http://book.zongheng.com/chapter/917253/61403035.html"
re := regexp.MustCompile(zh.PP.BookContentRegexpstr)
match := re.FindAllSubmatch(content, -1)
result := HandResult{}
result.ResultUrls = append(result.ResultUrls, zh.PP.BookContentRequestUrl)
//去除html 标签
re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
//+">" 这里加上一个 > 是因为在匹配获得内容时有 残缺html 标签
// 所以加上 > 构成完整html 标签一起替换
wenzi := re.ReplaceAllString(string(match[0][0])+">", "\n")
//替换连续换行
re, _ = regexp.Compile("\\s{2,}")
wenzi = re.ReplaceAllString(wenzi, "\n")
result.Items = append(result.Items, wenzi)
return result
}
在文章中 有个函数 otheroper.GetBuildStr()
下面是具体的功能代码
package otheroper
import (
"math/rand"
"strings"
"time"
)
//就是拼接字符串,
func GetBuildStr(str... string )string {
var build strings.Builder
for _,v:= range str{
build.WriteString(v)
}
return build.String()
}
//根据等级生成随机数,level 越大 生成随机数越大
func GetRandBylevel(level int) int {
rand.Seed(time.Now().Unix())
return rand.Intn(level*10)
}
//生成一定范围内随机数,
// isLoop 是否在循环之内,如果在,请设为ture 因为程序太快了,会生成一样的数字
func RandInt64(min, max int64,isLoop bool) int64 {
if min >= max || min == 0 || max == 0 {
return max
}
if isLoop {
time.Sleep(10*time.Nanosecond)
}
rand.Seed(time.Now().UnixNano())
return rand.Int63n(max-min) + min
}