抓取拉勾上的招聘信息

Program.cs

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Threading.Tasks;
using System.Threading;
using System.Text.RegularExpressions;

namespace POST
{
    class Program
    {
        static void Main(string[] args)
        {
            List<Job> jobs = new List<Job>();

            //参数 
            var param = new Dictionary<string, string>() { { "first", "false" }, { "pn", "18" }, { "kd", Uri.EscapeUriString("前端") } };

            //并行处理
            Parallel.For(1, 31, i =>
            {
                if (i == 0)
                {
                    param["first"] = "true";
                }
                param["pn"] = i.ToString();

                //POST请求
                var request = CreatePostHttpResponse("https://www.lagou.com/jobs/positionAjax.json?city=" + Uri.EscapeUriString("深圳") + "&needAddtionalResult=false", param, 0, null, null);
                var result = GetResponseString(request);

                //解析结果
                var json = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(result);

                //取出记录
                foreach (dynamic item in json.content.positionResult.result as Newtonsoft.Json.Linq.JArray)
                {
                    int begin, end;
                    string salary = item.salary.ToString();
                    MatchCollection matches = Regex.Matches(salary, @"\d+");
                    if (matches.Count > 1)
                    {
                        begin = Convert.ToInt32(matches[0].Value);
                        end = Convert.ToInt32(matches[1].Value);
                    }
                    else
                    {
                        begin = end = Convert.ToInt32(matches[0].Value);
                    }

                    jobs.Add(new Job() { Begin = begin, End = end, Title = item.positionName.ToString(), Url = "https://www.lagou.com/jobs/" + item.positionId.ToString() + ".html" });

                    Console.WriteLine(string.Format("{0}\t{1}\t{2}\thttps://www.lagou.com/jobs/{3}.html", begin, end, item.positionName, item.positionId));
                }
            });

            using (Context context = new Context())
            {
                context.Database.ExecuteSqlCommand("truncate table Jobs;");
                context.Job.AddRange(jobs);
                context.SaveChanges();
            }

            Console.ReadLine();
        }


        /// <summary>  
        /// 创建POST方式的HTTP请求  
        /// </summary>  
        public static HttpWebResponse CreatePostHttpResponse(string url, IDictionary<string, string> parameters, int timeout, string userAgent, CookieCollection cookies)
        {
            HttpWebRequest request = null;

            //如果是发送HTTPS请求  
            if (url.StartsWith("https", StringComparison.OrdinalIgnoreCase))
            {
                ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback((a, b, c, d) => true);
                request = WebRequest.Create(url) as HttpWebRequest;
                request.ProtocolVersion = HttpVersion.Version10;
            }
            else
            {
                request = WebRequest.Create(url) as HttpWebRequest;
            }
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";

            //设置代理UserAgent和超时
            //request.UserAgent = userAgent;
            //request.Timeout = timeout; 

            if (cookies != null)
            {
                request.CookieContainer = new CookieContainer();
                request.CookieContainer.Add(cookies);
            }

            //通过Chrome、Fiddler得到的Cookie
            request.Headers.Add("Cookie", "user_trace_token=20160807143405-f00b763c-5c68-11e6-83c2-525400f775ce; LGUID=20160807143405-f00b7bc5-5c68-11e6-83c2-525400f775ce; OUTFOX_SEARCH_USER_ID_NCOO=629042951.3769003; index_location_city=%E6%B7%B1%E5%9C%B3; JSESSIONID=46E904B117BCA78AF0A35443A5DD4E5D; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_navigation; _ga=GA1.2.584559713.1470551645; LGSID=20170408222808-961f8b03-1c67-11e7-9d7b-5254005c3644; LGRID=20170408224012-45965707-1c69-11e7-9d7b-5254005c3644; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1490887972,1491521596,1491566728,1491619940; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1491662412; SEARCH_ID=92db2fc2ed1748a19d11f9416eafdbaf");

            //发送POST数据  
            if (!(parameters == null || parameters.Count == 0))
            {
                StringBuilder buffer = new StringBuilder();
                int i = 0;
                foreach (string key in parameters.Keys)
                {
                    if (i > 0)
                    {
                        buffer.AppendFormat("&{0}={1}", key, parameters[key]);
                    }
                    else
                    {
                        buffer.AppendFormat("{0}={1}", key, parameters[key]);
                        i++;
                    }
                }
                byte[] data = Encoding.ASCII.GetBytes(buffer.ToString());
                using (Stream stream = request.GetRequestStream())
                {
                    stream.Write(data, 0, data.Length);
                }
            }
            string[] values = request.Headers.GetValues("Content-Type");
            return request.GetResponse() as HttpWebResponse;
        }

        /// <summary>
        /// 获取请求的数据
        /// </summary>
        public static string GetResponseString(HttpWebResponse webresponse)
        {
            using (Stream s = webresponse.GetResponseStream())
            {
                StreamReader reader = new StreamReader(s, Encoding.UTF8);
                return reader.ReadToEnd();
            }
        }
    }
}

Context.cs

using System;
using System.Collections.Generic;
using System.Data.Entity;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace POST
{
    public class Context : DbContext
    {
        public DbSet<Job> Job { get; set; }
    }

    public class Job
    {
        public int Id { get; set; }
        public int Begin { get; set; }
        public int End { get; set; }
        public string Title { get; set; }
        public string Url { get; set; }
    }
}

App.config

<?xml version="1.0" encoding="utf-8"?>
<configuration>
  <configSections>
    <section name="entityFramework" type="System.Data.Entity.Internal.ConfigFile.EntityFrameworkSection, EntityFramework, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" requirePermission="false"/>
  </configSections>
  <startup>
    <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/>
  </startup>
  <connectionStrings>
    <add name="Context" connectionString="Server=.;Database=Lagou;Integrated Security=SSPI" providerName="System.Data.SqlClient"/>
  </connectionStrings>
  <entityFramework>
    <defaultConnectionFactory type="System.Data.Entity.Infrastructure.LocalDbConnectionFactory, EntityFramework">
      <parameters>
        <parameter value="mssqllocaldb"/>
      </parameters>
    </defaultConnectionFactory>
    <providers>
      <provider invariantName="System.Data.SqlClient" type="System.Data.Entity.SqlServer.SqlProviderServices, EntityFramework.SqlServer"/>
    </providers>
  </entityFramework>
</configuration>

结果如图:

这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值