Koa2 + Puppeteer打造『爬虫系统』7

本文介绍如何创建课程和关于我们页面的数据模型,并实现数据的抓取及存储过程。包括使用Node.js进行数据模型定义、爬虫开发、以及数据同步到数据库的具体步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

十九.创建课程数据模型以及数据入表操作

1.创建course.js的模型

const seq = require('../connection/mysql_connect'),
   { STRING,INT } = require('../../config/db_type_config');
const Course = seq.define('course',{
    cid:{
        comment: 'Course ID',
        type:INT,
        allowNull:false,
        unique:true
    },
    href:{
        comment: 'Course detail page link',
        type:STRING,
        allowNull:false
    },
    posterUrl:{
        comment:'Course img url',
        type:STRING,
        allowNull:false
    },
    courseName:{
        comment:'Course name',
        type:STRING,
        allowNull:false
    },
    price:{
        comment:'Course price',
        type:STRING,
        allowNull:false
    },
    description:{
        comment:'Course description',
        type:STRING,
        allowNull:false
    },
    studentCount:{
        comment:'Course count',
        type:INT,
        allowNull:false
    },
    field:{
        comment:'the course the tab',
        type:INT,
        allowNull:false
    },
    posterKey:{
        comment:'qiniu course image name',
        type:STRING,
        allowNull:false
    },
    status:{
        comment:'course status',
        type:INT,
        defaultValue:1,
        allowNull:false
    }

    
})   
module.exports = Course;

2.入口导入

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student'),
      CourseTab = require('./courseTab')
      Course = require('./course')

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student,CourseTab,Course
}

node do/sync.js同步表

3.在service中写入course.js

const CourseModel = require('../do/models/course');
class CourseService{
    async addCourse(data){
        const cid = data.cid;
        const result = await CourseModel.findOne({
            where:{cid}
        })
        if(result){
            return await CourseModel.update(data,{
                where:{cid}
            })
        }else{
            return await CourseModel.create(data)
        }
    }
}
module.exports = new CourseService();

4.完善控制器

 {addCourse} = require('../service/course')
const result = await addCourse(item);
                       if(result){
                           console.log('Data create Ok')
                       }else{
                           console.log('Data create failed')
                       }

二十.爬取关于我们数据、建立表模型、数据入表

1.配置config.js

module.exports={
    qiniu:{
        keys:{
            ak:'qMK7okFyL1xX2o8gjFzy1PrI_jXR_yf58naIcIs8',
            sk:'j6Ngxxy6xSzXi9czKXun0Uju_5hDahAla9THml9C'
        },
        bucket:{
            tximg:{
                bucket_name:'crawler-txclass',
                domain:'http://qexgb3yqx.hn-bkt.clouddn.com'
            }
        }
    },
    crawler:{
        url:{
            main:'https://msiwei.ke.qq.com/#tab=0&category=-1',
            course:'https://msiwei.ke.qq.com/#tab=1&category=-1',
            teacher:'https://msiwei.ke.qq.com/#tab=2&category=-1',
            aboutus:'https://msiwei.ke.qq.com/#category=-1&tab=3'
        }
    }
}

2.建立aboutus.js爬虫文件

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
Crawler({
    url:crawler.url.aboutus,
    callback(){
        const $ = window.$,
              $wrapper = $('.agency-about');
        return {
            aid:1,
            posterUrl:$wrapper.find('.about-banner-pic0').css('background-image').match(/\"(.+?)\"/)[1],
            title:$wrapper.find('.about-agency-propagate').text(),
            name:$wrapper.find('.about-agency-name').text(),
            intro:$wrapper.find('.about-agency-intr').text(),
            posterKey:''
        }      
    }
})     

 

3.编辑路由

router.get('/crawl_aboutus',crawlerController.crawlAboutus )

4.编写控制器

crawlAboutus(){
        startProcess({
            path:'../crawler/aboutus',
            async message(data){
                if(data.posterUrl && !data.posterKey){
                    try {
                        const posterData = await qiniuUpload({
                            url:data.posterUrl,
                            bucket:qiniu.bucket.tximg.bucket_name,
                            ext:'.jpg'
                        })
                        if(posterData.key){
                            data.posterKey = posterData.key
                        }
                    } catch (error) {
                        console.log(error)
                    }
                }
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

5.建立表模型

const seq = require('../connection/mysql_connect'),
   { STRING,INT ,TEXT} = require('../../config/db_type_config');
const Aboutus = seq.define('aboutus',{
    aid:{
        comment:'aboutus Id',
        type:INT,
        allowNull:false,
        unique:true
    },
    posterUrl:{
        comment:'poster',
        type:STRING,
        allowNull:false
    },
    title:{
        comment:'title',
        type:STRING,
        allowNull:false
    },
    name:{
        comment:'NAME',
        type:STRING,
        allowNull:false
    },
    intro:{
        comment:'introduction',
        type:TEXT,
        allowNull:false
    },
   
    posterKey:{
        comment:'qiniu poster image name',
        type:STRING,
        allowNull:false
    }
})   
module.exports = Aboutus;

6.入口文件导入

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student'),
      CourseTab = require('./courseTab')
      Course = require('./course'),
      Aboutus =  require('./aboutus'),

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student,CourseTab,Course,Aboutus
}

7.建立Aboutus.js在service中

const AboutusModel = require('../do/models/aboutus');
class AboutusService {
    async addAboutus(data){
        const id = data.aid;
        const result = await AboutusModel.findOne({
            where:{id}
        });
        if(result){
            return await AboutusModel.update(data,{
                where:{id}
            })
        }else{
            return await AboutusModel.create(data);
        }
    }
}
module.exports=new AboutusService();

8.控制器完善

 const result = await addAboutus(item);
                        if(result){
                            console.log('Data create Ok')
                        }else{
                            console.log('Data create failed')
                        }

访问路径写入

二十一.统一同步表模型以及数据入表操作

node do/sync.js

依次访问路由

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值