Jenkins 内网私有插件库搭建加爬虫爬取官网插件,源码直接共享

本文介绍了使用Go脚本爬取Jenkins插件网站的详细过程,包括爬取策略、限制下载版本、处理反爬机制以及源码分享。作者还提供了如何配置jenkins插件仓库地址的方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

一、编写了Go脚本爬取jenkins插件网上的插件

脚本备注:

1、脚本默认是爬取插件的近10个版本(后期使用发现还是有很多插件版本不全)

修改方式:subdirectory_analysis方法里的第一个入参就是限定下载的版本数,-1则表示不限制

2、插件下载目录修改可自定义

3、Jenkins的插件网有反爬虫机制,会拒绝访问,脚本因此中断后可重新执行下载过的插件不会重复下载

二、源码分享

1、源码展示

package main

import (
	"compress/gzip"
	"fmt"
	_ "fmt"
	"io"
	"io/ioutil"
	"net/http"
	"os"
	"path"
	"regexp"
	"strings"
	"sync"
	"time"
)

type first_url struct {
	root_url  string
	root_dir  string
	url_path1 []string
	url_path2 []string
	url_path3 []string
}

var down_paral chan string
var Ndown_paral chan string
var wg sync.WaitGroup

var nwg sync.WaitGroup

func http_get(request_url string) string {


	resp, err := http.Get(request_url)

	if err != nil {
		fmt.Printf("%v 网页访问失败;失败信息:%v", request_url, err)
		return "Nil"
	}

	//判断网页是否经过压缩读取body
	ContentType := strings.ToLower(resp.Header.Get("Content-Encoding"))
	if strings.Contains(ContentType, "gzip") {
		readers, readers_err := gzip.NewReader(resp.Body)
		if readers_err != nil {
			fmt.Printf("网页%v解压失败,失败信息:%v", request_url, readers_err)
			return "Nil"
		}
		defer readers.Close()
		//fmt.Println("Respon result:", readers)
	}

	body, body_err := ioutil.ReadAll(resp.Body)

	var body_result string

	if body_err != nil {
		fmt.Printf("读取网页主体失败,失败信息:%v", body_err)
		return "Nil"
	} else {
		body_result = string(body)
	}
	defer resp.Body.Close()
	return body_result
}

func analysis_title(limit_point int, root_dir *string, url_path *[]string, url_addr *string, body_result string) {
	lines := strings.Split(body_result, "\n")
	re := regexp.MustCompile("title=\"([^\"]*)\"")
	var line_row []string
	for _, line_row_temp := range lines {
		if strings.Contains(line_row_temp, "title") && !strings.Contains(line_row_temp, "镜像") {
			line_row = append(line_row, line_row_temp)
		}
	}
	if limit_point > 0 && len(line_row) > limit_point {
		line_row = line_row[len(line_row)-20 : len(line_row)]
	}
	//fmt.Println(line_row)
	for _, line := range line_row {
		//fmt.Println("网页地址:", url.root_url+re_retry.FindString(re.FindString(line)))
		if strings.Contains(line, "hpi") {
			url_path_temp := fmt.Sprintln((*url_addr) + strings.Replace(strings.Replace(re.FindString(line), "title=", "", 1), "\"", "", 2))
			*(url_path) = append((*url_path), url_path_temp)
			sys_subdir := strings.Split(url_path_temp, "/")
			mkdir_dir("E:/" + strings.Join(sys_subdir[3:len(sys_subdir)-1], "/"))

			down_hpi(strings.Trim(url_path_temp, "\n"), "E:/"+strings.Join(sys_subdir[3:len(sys_subdir)], "/"))
			time.Sleep(30 * time.Second)
		} else {
			url_path_temp := fmt.Sprintln((*url_addr) + strings.Replace(strings.Replace(re.FindString(line), "title=", "", 1), "\"", "", 2) + "/")
			check_file_exists := path.Dir(strings.Replace(url_path_temp, "https://mirrors.tuna.tsinghua.edu.cn", strings.TrimSpace("E:"), 1))

			if _, check_err := os.Stat(check_file_exists); !os.IsNotExist(check_err) {
				fmt.Printf("%v 目录已存在,跳过此目录\n", check_file_exists)
				continue
			}
			//fmt.Println(check_file_exists)
			*(url_path) = append((*url_path), url_path_temp)
		}

		//fmt.Println("analysis_title====", url_path_temp)

	}

}


func subdirectory_analysis(limit_point int, waitGo *sync.WaitGroup, down_paral chan string, root_dir *string, url_path2 *[]string) {
	for {
		url_path1_result, ok := <-down_paral
		if !ok {
			//fmt.Printf("%v 队列读取失败 退出线程\n", wg)
			break
		}

		//fmt.Println("sss", url_path1_result)
		request_path1_url := strings.Trim(url_path1_result, "\n")
		respon := http_get(request_path1_url)
		if respon == "Nil" {
			fmt.Printf("网页%v 访问失败", url_path1_result)
			continue
		}
		analysis_title(limit_point, root_dir, url_path2, &request_path1_url, respon)
	}
	defer waitGo.Done()
}

func mkdir_dir(dir string) {
	_, dir_err := os.Stat(dir)
	if os.IsNotExist(dir_err) {
		//fmt.Printf("%v 文件目录不存在,自动创建该文件\n", dir)
		os.MkdirAll(dir, 0777)
	}
}

func down_hpi(url string, file_path string) {
	fileName := path.Join(path.Dir(strings.TrimSpace(file_path)), path.Base(strings.TrimSpace(file_path)))
	_, file_stat := os.Stat(fileName)

	if file_stat == nil {
		fmt.Printf("%v 文件已存在\n", fileName)
		return
	}
	repsonse, resp_err := http.Get(url)
	if resp_err != nil {
		fmt.Printf("%v 网页访问失败,报错信息:%v", url, resp_err)
	}
	defer repsonse.Body.Close()

	file, file_err := os.Create(fileName)
	if file_err != nil {
		fmt.Printf("%v 文件创建失败,失败信息%v\n", file, file_err)
	}
	defer file.Close()
	_, copy_err := io.Copy(file, repsonse.Body)
	if copy_err != nil {
		fmt.Println("Failed to download file:", copy_err)
		return
	}

	fmt.Printf("%v downloaded successfully.", file_path)
}

func main() {

	down_paral = make(chan string, 10)
	Ndown_paral = make(chan string, 20)

	wg.Add(10)
	nwg.Add(20)
	url := &first_url{
		root_url:  "https://mirrors.tuna.tsinghua.edu.cn/jenkins/updates",
		url_path1: []string{},
		root_dir:  "E:/jenkins/plugins",
	}

	mkdir_dir(url.root_dir)
	var body_result string = http_get(url.root_url)
	analysis_title(-1, &url.root_dir, &url.url_path1, &url.root_url, body_result)
	go func() {
		for _, val := range url.url_path1 {
			down_paral <- val

		}
		defer close(down_paral)
	}()

	for i := 1; i <= 10; i++ {
		go subdirectory_analysis(20, &wg, down_paral, &url.root_dir, &url.url_path2)
	}

	wg.Wait()
	go func() {
		for _, val := range url.url_path2 {
			Ndown_paral <- val

		}
		defer close(Ndown_paral)
	}()

	for i := 1; i <= 20; i++ {
		go subdirectory_analysis(20, &nwg, Ndown_paral, &url.root_dir, &url.url_path3)
	}
	nwg.Wait()
	//down_hpi("https://mirrors.tuna.tsinghua.edu.cn/jenkins/updates/update-center.json", "E:/jenkins/updates/update-center.json")
}

三 Jenkins插件网搭建

1、使用httpd服务搭建网站

2、将插件上传到本地目录

3、在httpd服务目录下创建软连接,且重启服务

四、配置jenkins插件仓库地址

1、进到工作目录

我是官网下载war包直接启动jenkins方式进行搭建的,该方式jenkins默认工作目录是启动用户家目录的.jenkins目录下

2、修改jenkins默认插件下载地址

进到/root/.jenkins/update目录下修改default.json文件,将默认插件下载地址改成自己的私有插件库地址,sed 后面马赛克部分是替换成你搭建的私有插件库的地址,示例如下:

3、重启jenkins服务就完事啦

Ant Apache HttpComponents Client 4.x API Plugin Bootstrap 4 API Plugin Bootstrap 5 API bouncycastle API Branch API Build Timeout Caffeine API Plugin Checks API plugin Command Agent Launcher Plugin Conditional BuildStep Credentials Credentials Binding Plugin Display URL API Durable Task Plugin ECharts API Email Extension Plugin Folders Plugin Font Awesome API Plugin Git Git client GIT server Plugin Gitee Plugin GitHub API GitHub Branch Source GitHub plugin GitLab Plugin Gradle Plugin Infrastructure plugin for Publish Over X Jackson 2 API Java JSON Web Token (JJWT) Plugin Javadoc Plugin JavaScript GUI Lib: ACE Editor bundle plugin JavaScript GUI Lib: Handlebars bundle plugin JavaScript GUI Lib: Moment.js bundle plugin JQuery3 API Plugin JSch dependency plugin JUnit LDAP Plugin Localization Support Plugin Localization: Chinese (Simplified) Lockable Resources plugin Mailer Plugin Matrix Authorization Strategy Plugin Matrix Project Plugin Maven Integration OkHttp Plugin Oracle Java SE Development Kit Installer Plugin OWASP Markup Formatter Plugin PAM Authentication plugin Parameterized Trigger plugin Pipeline Pipeline Graph Analysis Plugin Pipeline: API Pipeline: Basic Steps Pipeline: Build Step Pipeline: Declarative Pipeline: Declarative Extension Points API Pipeline: GitHub Groovy Libraries Pipeline: Groovy Pipeline: Input Step Pipeline: Job Pipeline: Milestone Step Pipeline: Model API Pipeline: Multibranch Pipeline: Nodes and Processes Pipeline: REST API Plugin Pipeline: SCM Step Pipeline: Shared Groovy Libraries Pipeline: Stage Step Pipeline: Stage Tags Metadata Pipeline: Stage View Plugin Pipeline: Step API Pipeline: Supporting APIs Plain Credentials Plugin Plugin Utilities API Popper.js 2 API Popper.js API Plugin Publish Over SSH Resource Disposer Plugin Run Condition Plugin SCM API Plugin Script Security Plugin Snakeyaml API Plugin SSH Build Agents plugin SSH Credentials Plugin SSH plugin SSH server Structs Plugin Timestamper Token Macro Plugin Trilead API Plugin
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值