Koa2 + Puppeteer打造『爬虫系统』6

本文介绍如何使用Sequelize创建优秀学生表模型,并实现学生信息及课程列表数据的爬取与入库,包括表模型的设计、数据爬取、图片上传至七牛云存储等步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

十六.创建优秀学生表模型以及入表操作

1.在models中建立student.js表模型

const seq = require('../connection/mysql_connect'),
      {STRING,INT,TEXT} = require('../../config/db_type_config')
const Student = seq.define('student',{
    sid:{
        comment:'student Id',
        type:INT,
        allowNull:false,
        unique:true
    },
    studentImg:{
        comment:'student photo',
        type:STRING,
        allowNull:false
    },
    studentName:{
        comment:'student name',
        type:STRING,
        allowNull:false
    },
    intro:{
        comment:'student introduction',
        type:STRING,
        allowNull:false
    },
    courseName:{
        comment:'student CourseName',
        type:STRING,
        allowNull:false
    },
    courseLink:{
        comment:'student courseLink',
        type:STRING,
        allowNull:false
    },
    courseImgKey:{
        comment:'student courseImagekey',
        type:STRING,
        allowNull:false
    }
})      
module.exports = Student;

2.导入入口

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student');

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student
}

同步表 node  do/sync,js

3.在service中建立student.js

const StudentModel = require('../do/models/student');

class StudentService{
    async addStudent(data){
        const sid = data.sid;
        const result = await StudentModel.findOne({
            where:{sid}
        })
        if(result){
            return await StudentModel.update(data,{
                where:{sid}
            })
        }else{
            return await StudentModel.create(data)
        }
        
    }
}
module.exports = new StudentService();

4.控制器中

crawlStudent(){
        startProcess({
            path:'../crawler/student',
            async message(data){
                data.map(async item=>{
                   try {
                       const qiniu = config.qiniu;
                       if(item.studentImg && !item.courseImgKey){
                           const imgData = await qiniuUpload({
                               url:item.studentImg,
                               bucket:qiniu.bucket.tximg.bucket_name,
                               ext:'.jpg'
                           })
                           if(imgData.key){
                               item.courseImgKey = imgData.key
                           }
                       }
                       const result = await addStudent(item);
                       if(result){
                           console.log('Data create Ok')
                       }else{
                           console.log('Data create failed')
                       }
                   } catch (error) {
                       console.log(error)
                   }
                    
                })
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

5.给每个models都增加

 status:{
        comment:'course status',
        type:INT,
        defaultValue:1,
        allowNull:false
    }1

6.老师的模型里增加

isStar:{
        comment:'is the teacher a star teacher',
        type:INT,
        defaultValue:0,
        allowNull:false
    }

十七.课程选项卡数据爬取、创建表模型、数据入表

1.在crawl文件夹中courseTab.js

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
 Crawler({
     url:crawler.url.course,
     callback(){
         const $ = window.$,
               $item = $('.course-tab-filter li');
         let data = [];
         $item.each((index,item)=>{
             const $el = $(item),
                   $itemLk = $el.find('.course-tab-filter-item');
             const dataItem = {
                 cid:index,
                 title:$itemLk.text().replace(/促/,'')
               
             }     
             data.push(dataItem)   
         })  
         return data; 
     }
 })     

2.添加路由

router.get('/crawl_course_tab',crawlerController.crawlCourseTab )

3.控制器书写方法

 crawlCourseTab(){
        startProcess({
            path:'../crawler/courseTab',
            async message(data){
                data.map(async item=>{
                 
                    
                })
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

4.增加表模型

const seq = require('../connection/mysql_connect'),
   { STRING,INT } = require('../../config/db_type_config');
const CourseTab = seq.define('course_tab',{
    cid:{
        comment:'course id',
        type:INT,
        allowNull:false,
        unique:true
    },
    title:{
        comment:'course tab',
        type:STRING,
        allowNull:false
    }
})   
module.exports = CourseTab;

5.入口文件导入

const Slider = require('./slider'),
      RecomCourse = require('./recomCourse'),
      AgencyInfo = require('./agencyinfo'),
      Collection = require('./collection'),
      Teacher =  require('./teacher'),
      Student = require('./student'),
      CourseTab = require('./courseTab')

module.exports={
    Slider,AgencyInfo,RecomCourse,Collection,Teacher,Student,CourseTab
}

node do/sync.js同步表模型

6.service中创建courseTab.js

const CourseTabModel = require('../do/models/courseTab');
class CourseTabService{
    async addCourseTab(data){
        const cid = data.cid;
        const result = await CourseTabModel.findOne({
            where:{cid}
        })
        if(result){
            return await CourseTabModel.update(data,{
                where:{cid}
            })
        }else{
            return await CourseTabModel.create(data)
        }
    }
}
module.exports = new CourseTabService();

7.完善控制器

 {addCourseTab} = require('../service/courseTab')
       crawlCourseTab(){
        startProcess({
            path:'../crawler/courseTab',
            async message(data){
                data.map(async item=>{
                 const result = await addCourseTab(item);
                 if(result){
                     console.log('Data create ok')
                 }else{
                     console.log('Data create failed')
                 }
                })
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

十八.爬取课程列表数据以及上传七牛图床

1.crwal文件夹中建立course.js文件

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
 Crawler({
     url:crawler.url.course,
     callback(){
         const $ = window.$,
               $item = $('.course-card-list-multi-wrap .course-card-item');
         const data = [];
         $item.each((index,item)=>{
             const $el = $(item),
             $itemLk = $el.find('.item-img-link');
             const dataItem = {
                 cid: $itemLk.attr('data-id'),
                 href:$itemLk.prop('href'),
                 posterUrl:$itemLk.find('.item-img').prop('src'),
                 courseName:$itemLk.find('.item-img').prop('title'),
                 price:$el.find('.item-price').text() === '免费' ? '0' : $el.find('.item-price').text(),
                 description:$el.find('.item-status-step').text(),
                 studentCount:parseInt($el.find('.item-user').text()),
                 field:-1,
                 posterKey:''
             }
             data.push(dataItem)
         })      
       return data;
     }
 })     

 

2.添加路由

router.get('/crawl_course_data',crawlerController.crawlCourseData )

3.在控制器中

crawlCourseData(){
        startProcess({
            path:'../crawler/course',
            async message(data){
                data.map(async item=>{
                   try {
                       const qiniu = config.qiniu;
                       if(item.posterUrl && !item.posterKey){
                           const imgData = await qiniuUpload({
                               url:item.posterUrl,
                               bucket:qiniu.bucket.tximg.bucket_name,
                               ext:'.jpg'
                           })
                           if(imgData.key){
                               item.posterKey = imgData.key
                           }
                       }
                       const result = await addCourseData(item);
                       if(result){
                           console.log('Data create Ok')
                       }else{
                           console.log('Data create failed')
                       }
                   } catch (error) {
                       console.log(error)
                   }
                    
                })
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值