OneThink框架的文章详情页分页

本文介绍了一种在ThinkPHP框架中实现文章内容分页的方法,通过将文章内容分割成数组,依据页面参数显示不同部分的内容。同时,还介绍了如何根据文章及分类定制模板,并在模板中显示分页链接。

Application/Home/Controller/ArticleController.class.php的detail函数修改结果如下:

/* 文档模型详情页 */
public function detail($id = 0, $p = 1){
/* 标识正确性检测 */
if(!($id && is_numeric($id))){
$this->error('文档ID错误!');
}

/* 页码检测 */
$p = intval($p);
$p = empty($p) ? 1 : $p;

/* 获取详细信息 */
$Document = D('Document');
$info = $Document->detail($id);
if(!$info){
$this->error($Document->getError());
}
$content_array=explode('##分页##',$info['content']);
if(count($content_array)>1){
$info['content']=$content_array[$p-1];
$parse="";
for($i=1;$i<=count($content_array);$i++){
$parse.="<li><a href=".U('Article/detail',array('id'=>$info['id'],'page'=>$i)).">".$i."</a></li>";
}
$this->assign('page_string',$parse);
}

/* 分类信息 */
$category = $this->category($info['category_id']);

/* 获取模板 */
if(!empty($info['template'])){//已定制模板
$tmpl = $info['template'];
} elseif (!empty($category['template_detail'])){ //分类已定制模板
$tmpl = $category['template_detail'];
} else { //使用默认模板
$tmpl = 'Article/'. get_document_model($info['model_id'],'name') .'/detail';
}

/* 更新浏览数 */
$map = array('id' => $id);
$Document->where($map)->setInc('view');

/* 模板赋值并渲染模板 */
$this->assign('category', $category);
$this->assign('info', $info);
$this->assign('page', $p); //页码
$this->display($tmpl);
}

 

在后台添加文章的时候,在需要分页的地方插入   ##分页##

视图部分分页代码为:

<div class="pagination">
<ul>
{$page_string}
</ul>
</div>

 

基本思路:是把文章的content字段内容,分割为数组元素,根据传入p参数值的不同,显示数组不同的元素。

转载于:https://www.cnblogs.com/shanmao/p/4940722.html

import requests from bs4 import BeautifulSoup import csv # 列表页 & 详情页配置 list_url = "https://www.ptpress.com.cn/hotBook/getHotBookList?parentTagId=75424c57-6dd7-4d1f-b6b9-8e95773c0593&rows=18&page=1" detail_base_url = "https://www.ptpress.com.cn/shopping/buy?bookId=" headers = { "accept": "*/*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "connection": "keep-alive", "cookie": "acw_tc=1a0c639317577566109758191e44241413d6d978ada89abdace0b122fd75ed; JSESSIONID=EE398EB97E4459628DCD634A91540588", "host": "www.ptpress.com.cn", "referer": "https://www.ptpress.com.cn/shopping/index", "sec-ch-ua": "\"Chromium\";v=\"140\", \"Not=A?Brand\";v=\"24\", \"Microsoft Edge\";v=\"140\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "Windows", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0" } all_book_details = [] def get_book_ids_and_list_names(): book_ids = [] list_names = [] response = requests.get(list_url, headers=headers) if response.status_code == 200: data = response.json() book_list = data.get("data", {}).get("rows", []) for book in book_list: book_id = book.get("bookId") list_name = book.get("bookName") if book_id and list_name: book_ids.append(book_id) list_names.append(list_name) print(f"成功获取 {len(book_ids)} 本热门图书的 bookId 和列表页书名") else: print(f"获取图书列表失败,状态码:{response.status_code}") return book_ids, list_names def get_book_detail(book_id, list_name): detail_url = f"{detail_base_url}{book_id}" response = requests.get(detail_url, headers=headers) book_info = {"列表页书名": list_name, "详情页链接": detail_url} if response.status_code == 200: soup = BeautifulSoup(response.text, "html.parser") # 提取书名:<p class="book-name font-max"> book_name_elem = soup.find("p", class_="book-name font-max") book_info["详情页书名"] = book_name_elem.get_text(strip=True) if book_name_elem else "未知书名" # 提取作者:<p class="book-author color-base3"> author_elem = soup.find("p", class_="book-author color-base3") book_info["作者"] = author_elem.get_text(strip=True) if author_elem else "未知作者" # 提取简介:<p class="book-summary font-min"> summary_elem = soup.find("p", class_="book-summary font-min") book_info["简介"] = summary_elem.get_text(strip=True) if summary_elem else "无简介" # 提取价格:假设在 <span class="price">(需根据实际页面调整,若结构不同需重新定位) price_elem = soup.find("span", class_="price") book_info["价格"] = price_elem.get_text(strip=True) if price_elem else "未知价格" else: book_info["详情页书名"] = "未知书名" book_info["作者"] = "未知作者" book_info["简介"] = "详情页请求失败" book_info["价格"] = "未知价格" print(f"请求详情页失败:{detail_url},状态码:{response.status_code}") return book_info if __name__ == "__main__": book_ids, list_names = get_book_ids_and_list_names() if not book_ids or not list_names: exit() print("\n开始爬取详情页信息...") for idx, (book_id, list_name) in enumerate(zip(book_ids, list_names), 1): print(f"正在爬取第 {idx} 本图书(bookId:{book_id})") book_detail = get_book_detail(book_id, list_name) all_book_details.append(book_detail) with open("图书信息.csv", "w", newline="", encoding="utf-8") as f: fieldnames = ["列表页书名", "详情页书名", "作者", "简介", "价格", "详情页链接"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_book_details) print(f"\n爬取完成!共获取 {len(all_book_details)} 本图书详情,已保存到「图书信息.csv」") print("\n前3本图书信息预览:") for i in range(min(3, len(all_book_details))): print(f"\n第 {i+1} 本:") for key, value in all_book_details[i].items(): print(f"{key}:{value}") 请检查代码
09-14
10.爬取每一项数据的详情页信息(如果评论等),并保存到另外一个csv文件中(增加部分)import scrapy import re from ..items import VegetableItem class VegetablesSpider(scrapy.Spider): name = "vegetables" allowed_domains = ["cnhnb.com"] def __init__(self, max_pages=1, crawl_one_page=False, *args, **kwargs): super(VegetablesSpider, self).__init__(*args, **kwargs) self.max_pages = int(max_pages) self.crawl_one_page = crawl_one_page # 是否只爬取一页 self.base_url = 'https://www.cnhnb.com/p/sgzw-0-0-0-0-{page}/' def start_requests(self): yield scrapy.Request( url=self.base_url.format(page=1), callback=self.parse ) for page in range(1, self.max_pages + 1): # 生成1到max_pages的请求 url = self.base_url.format(page=page) yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): # 获取HTML内容(如果需要) html_content = response.text self.logger.debug(f"页面HTML长度: {len(html_content)} 字符") # 打印HTML长度用于调试 #4.解析数据 for item in response.css('.supply-item'): vegetable = VegetableItem() # 提取基本信息 vegetable['title'] = item.css('.title h2::text').get('').strip() product_link = item.css('a.com-bg::attr(href)').get() vegetable['product_link'] = product_link # 处理图片URL image_path = item.css('.shop-image img::attr(src)').get() if image_path: # 转换为绝对URL并存储在image_urls列表中 vegetable['image_urls'] = [response.urljoin(image_path)] else: vegetable['image_urls'] = [] # 确保字段存在 # 提取价格信息 price_value = item.css('.sp1::text').get('').strip() price_unit = item.css('.shops-price::text').get('').strip() if price_value in price_unit: price_unit = price_unit.replace(price_value, '').strip() vegetable['price_value'] = price_value vegetable['price_unit'] = price_unit # 提取其他信息 vegetable['turnover'] = item.css('.turnover::text').get('').strip() vegetable['merchant_name'] = item.css('.l-shop-btm a::text').get('').strip() vegetable['merchant_link'] = item.css('.l-shop-btm a::attr(href)').get() vegetable['location'] = item.css('.r-shop-btm::text').get('').strip() # 提取标签 tags = item.css('.cw-tag::text').getall() vegetable['tags'] = ','.join([tag.strip() for tag in tags]) # 提取VIP级别 vip_image = item.css('.shops-btm-l img::attr(src)').get('') vip_level = '未知' if 'vip24_' in vip_image: vip_match = re.search(r'vip24_(\d+)\.png', vip_image) if vip_match: vip_level = vip_match.group(1) vegetable['vip_level'] = vip_level # 提取认证信息 certifications = [] for img in item.css('.shops-btm-l img::attr(src)').getall(): if 'bao24.png' in img: certifications.append('保障') elif 'gjrz24.png' in img: certifications.append('国家认证') vegetable['certifications'] = ','.join(certifications) yield vegetable
06-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值