1. 前言
当时不知道怎么想的,想实现把一个网页数据拿下来然后自己组装到app的tableview里面,后来想法废弃了,但是这个东西学了部分,做个记录吧,所以有了这个文章,同时,后续还会补充一下WKWebView的学习。
截止发文:目标网址的结构没有变化,我会把我现在爬取的网页结构一起放在demo里面,供分析用。要不然结构变了,demo就没用了。
2. 全部代码
demo:点击下载
3. WKWebView
3.1 引库
#import <WebKit/WebKit.h>
@property (nonatomic, strong) WKWebView *webView;
3.2 网页简单加载
就拿微博说事吧
self.webView= [[WKWebView alloc] initWithFrame:self.view.frame];
self.webView.navigationDelegate = self;
[self.view addSubview:self.webView];
[self.webView loadRequest:[NSURLRequest requestWithURL:[NSURL URLWithString:@"https://weibo.com/"]]];
4. WKNavigationDelegate
4.1 方法
@protocol WKNavigationDelegate <NSObject>
//请求之前,决定是否要跳转:用户点击网页上的链接,需要打开新页面时,将先调用这个方法。
- (void)webView:(WKWebView *)webView decidePolicyForNavigationAction:(WKNavigationAction *)navigationAction decisionHandler:(void (^)(WKNavigationActionPolicy))decisionHandler;
//接收到相应数据后,决定是否跳转
- (void)webView:(WKWebView *)webView decidePolicyForNavigationResponse:(WKNavigationResponse *)navigationResponse decisionHandler:(void (^)(WKNavigationResponsePolicy))decisionHandler;
//页面开始加载时调用
- (void)webView:(WKWebView *)webView didStartProvisionalNavigation:(null_unspecified WKNavigation *)navigation;
// 主机地址被重定向时调用
- (void)webView:(WKWebView *)webView didReceiveServerRedirectForProvisionalNavigation:(null_unspecified WKNavigation *)navigation;
// 页面加载失败时调用
- (void)webView:(WKWebView *)webView didFailProvisionalNavigation:(null_unspecified WKNavigation *)navigation withError:(NSError *)error;
// 当内容开始返回时调用
- (void)webView:(WKWebView *)webView didCommitNavigation:(null_unspecified WKNavigation *)navigation;
// 页面加载完毕时调用
- (void)webView:(WKWebView *)webView didFinishNavigation:(null_unspecified WKNavigation *)navigation;
//跳转失败时调用
- (void)webView:(WKWebView *)webView didFailNavigation:(null_unspecified WKNavigation *)navigation withError:(NSError *)error;
// 如果需要证书验证,与使用AFN进行HTTPS证书验证是一样的
- (void)webView:(WKWebView *)webView didReceiveAuthenticationChallenge:(NSURLAuthenticationChallenge *)challenge completionHandler:(void (^)(NSURLSessionAuthChallengeDisposition disposition, NSURLCredential * _Nullable credential))completionHandler;
//9.0才能使用,web内容处理中断时会触发
- (void)webViewWebContentProcessDidTerminate:(WKWebView *)webView API_AVAILABLE(macosx(10.11), ios(9.0));
@end
4.2 本文主要用的
- (void)webView:(WKWebView *)webView didFinishNavigation:(null_unspecified WKNavigation *)navigation {
[self.webView evaluateJavaScript:@"document.body.innerHTML" completionHandler:^(id _Nullable result, NSError * _Nullable error) {
NSLog(@"网页抓取结果:%@", result);
[self writeToFileWithTxt:result];
}];
NSString *titleSrcString = [NSString stringWithFormat:@"document.getElementsByClassName('weibo-text')[0].getElementsByTagName('a')[0].href"];
[self.webView evaluateJavaScript:titleSrcString completionHandler:^(id _Nullable result, NSError * _Nullable error) {
// 超话链接
NSLog(@"标题链接抓取结果:%@", result);
}];
NSString *titleString = [NSString stringWithFormat:@"document.getElementsByClassName('weibo-text')[0].textContent"];
[self.webView evaluateJavaScript:titleString completionHandler:^(id _Nullable result, NSError * _Nullable error) {
// 标题
NSLog(@"标题抓取结果:%@", result);
}];
NSString *imageSrcString = [NSString stringWithFormat:@"document.getElementsByClassName('m-img-box')[0].getElementsByTagName('img')[0].src"];
[self.webView evaluateJavaScript:imageSrcString completionHandler:^(id _Nullable result, NSError * _Nullable error) {
// 取一个头像地址
NSLog(@"头像抓取结果:%@", result);
}];
NSString *authorString = [NSString stringWithFormat:@"document.getElementsByClassName('m-text-cut')[0].textContent"];
[self.webView evaluateJavaScript:authorString completionHandler:^(id _Nullable result, NSError * _Nullable error) {
// 自媒体名称
NSLog(@"自媒体名称抓取结果:%@", result);
}];
}
4.2.1 分析
脚本中要根据当前字段的class与tpye等相关内容去获取,可以结合demo里面的网页结构来分析。