Go语言爬虫项目将结果写入MySql数据库

Go语言网络爬虫实战

最新推荐文章于 2025-09-15 15:09:47 发布

原创最新推荐文章于 2025-09-15 15:09:47 发布 · 1.8k 阅读

8 ·

CC 4.0 BY-SA版权

文章标签：

#Go语言爬虫项目将结果写入MySql数据库

go语言专栏收录该内容

26 篇文章

订阅专栏

本文详细介绍使用Go语言进行网络爬虫的实现过程，包括如何发送HTTP请求获取网页内容，运用正则表达式解析HTML，以及将抓取的数据存储到MySQL数据库中。通过具体示例，读者将了解到Go语言在网络数据抓取方面的强大能力。

来源于Google资深工程师深度讲解Go语言

package main


import (
	"fmt"
	"io/ioutil"
	"net/http"
)


const url = "http://www.zhenai.com/zhenghun"


func main() {
	//发送get请求
	resp, err := http.Get(url)
	if err != nil {
		panic(err)
	}

	//关闭通道
	defer resp.Body.Close()

	//判断状态
	if resp.StatusCode != http.StatusOK {
		fmt.Errorf("StatusCode:%v \n", http.StatusOK)
	}

	//输出结果
	all, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		fmt.Errorf("ReadAll: %s ", err)
	}
	fmt.Printf("%s\n",all)
}

可以将整个html页面爬取下来

正则表达式处理

package main


import (
	"fmt"
	"io/ioutil"
	"net/http"
	"regexp"
)


//const url = "http://www.baidu.com"
const url = "http://www.zhenai.com/zhenghun"


func main() {
	//发送get请求
	resp, err := http.Get(url)
	if err != nil {
		panic(err)
	}

	//关闭通道
	defer resp.Body.Close()

	//判断状态
	if resp.StatusCode != http.StatusOK {
		fmt.Errorf("StatusCode:%v \n", http.StatusOK)
	}

	//输出结果
	all, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		fmt.Errorf("ReadAll: %s ", err)
	}
	printListCity(all)
}

// 获取城市，url
const cityListRe = `<a href="(http://www.zhenai.com/zhenghun/[0-9a-z]+)"[^>]*>([^<]*)</a>`

func printListCity(contents []byte) {
	rg := regexp.MustCompile(cityListRe)
	allSubmatch := rg.FindAllSubmatch(contents, -1)

	for _, m := range allSubmatch {
		fmt.Printf("%s\n ", m[1])
		fmt.Printf("%s\n ", m[2])
	}
}

结果

 http://www.zhenai.com/zhenghun/zhuhai
 珠海
 http://www.zhenai.com/zhenghun/zhumadian
 驻马店
 http://www.zhenai.com/zhenghun/zhuzhou
 株洲
 http://www.zhenai.com/zhenghun/zibo
 淄博
 http://www.zhenai.com/zhenghun/zigong
 自贡
 http://www.zhenai.com/zhenghun/ziyang1
 资阳
 http://www.zhenai.com/zhenghun/zunyi
 遵义

将结果存入数据库

注意的是id为自增长的主键，不参与golang语言的表结构展示，特别是在插入时，不应该算入在内

const cityListRe = `<a href="(http://www.zhenai.com/zhenghun/[0-9a-z]+)"[^>]*>([^<]*)</a>`

func mySql(contents []byte) {
	//用户名：密码^@tcp(地址:3306)/数据库
	db, err := sql.Open("mysql", "root:Kou123$%^@tcp(39.107.87.114:3306)/zhenai?charset=utf8")
	if err!=nil {
		fmt.Println(err)
		return
	}

	//表结构
	type info struct {
		city string `db:"city"`
		url string `db:"url"`
	}

	//查询表
	rows,err:=db.Query("SELECT * FROM city_url_id")

	//遍历打印
	for rows.Next(){
		var s info
		err=rows.Scan(&s.city,&s.url,)
	}
	
	//执行MySql语句
	rg := regexp.MustCompile(cityListRe)
	allSubmatch := rg.FindAllSubmatch(contents, -1)

	for _, m := range allSubmatch {
		//fmt.Printf("%s\n ", m[1])
		//fmt.Printf("%s\n ", m[2])
		//插入语句
		db.Exec("INSERT INTO city_url_id(city,url)VALUES (?,?)",  m[1], m[2])
	}
	rows.Close()
}

在这里插入图片描述