news_detail.xml 2


            <TextView android:textSize="16.0dip"
                android:textStyle="bold"
                android:textColor="#ff000000"
                android:id="@+id/news_detail_title"
                android:layout_width="fill_parent"
                android:layout_height="wrap_content"
                android:layout_marginBottom="10.0dip" />
           
            <TextView android:textColor="#ff999999"
                android:id="@+id/news_detail_time"
                android:layout_width="fill_parent"
                android:layout_height="wrap_content"
                android:layout_marginBottom="10.0dip"
                android:layout_below="@id/news_detail_title" />
           
            <View android:id="@+id/divide_line"
                android:layout_below="@id/news_detail_time"
                style="@style/line1" />
         
Building prefix dict from the default dictionary ... Loading model from cache C:\Users\35658\AppData\Local\Temp\jieba.cache Loading model cost 0.591 seconds. Prefix dict has been built successfully. 开始爬取北京银行与科技公司合作新闻... 正在处理第 1 页... 警告: 未能找到任何新闻项 第 1 页无新闻,停止爬取 未找到符合条件的合作信息 共获取 0 条合作记录 总耗时: 0.52 秒 >>> 还是代码爬不出结果,为什么,并修改import requests from bs4 import BeautifulSoup import pandas as pd import jieba import jieba.posseg as pseg import time import random import re import json import os from datetime import datetime from urllib.parse import quote # 配置jieba分词器 jieba.initialize() # 添加金融科技领域专有名词和公司名称 tech_keywords = ['科技', '技术', '数字', '智能', '数据', '信息', '云', 'AI', '区块链', '金融科技', '创新', '研发'] company_names = ['腾讯', '阿里', '百度', '京东', '字节跳动', '华为', '小米', '蚂蚁集团', '商汤科技', '旷视科技', '科大讯飞'] # 添加自定义词典 for name in company_names: jieba.add_word(name, freq=1000, tag='nt') jieba.add_word('北京银行', freq=1000, tag='nt') jieba.add_word('北银', freq=1000, tag='nt') jieba.add_word('BNK', freq=1000, tag='nt') # 搜索关键词 search_query = "北京银行 科技公司 合作" encoded_query = quote(search_query, safe='') # 安全编码 # 自定义User-Agent列表(替代fake_useragent) USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1" ] def get_dynamic_headers(): """生成动态请求头部""" return { "User-Agent": random.choice(USER_AGENTS), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Connection": "keep-alive", "Referer": "https://www.ringdata.com/", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-User": "?1" } def fetch_news_list(page_num): """获取指定页面的新闻列表""" base_url = "https://www.ringdata.com/news" params = { "keywords": search_query, "page": page_num # 修正分页参数 } try: # 使用动态头部 headers = get_dynamic_headers() response = requests.get( base_url, params=params, headers=headers, timeout=30 ) response.raise_for_status() # 检查是否被重定向或反爬 if response.status_code != 200: print(f"异常状态码: {response.status_code}") return None return response.text except requests.exceptions.RequestException as e: print(f"获取第 {page_num} 页失败: {str(e)}") return None def parse_news_list(html_content): """解析新闻列表页""" soup = BeautifulSoup(html_content, 'html.parser') news_list = [] # 更通用的新闻项选择器 items = soup.select('.news-list .item, .info-list li, .article-list li, .result-list li') if not items: # 尝试备用选择器 items = soup.select('div.item, li.item, div.news-item, li.news-item') if not items: print("警告: 未能找到任何新闻项") return news_list for item in items: try: # 提取标题和链接 title_elem = item.select_one('a[href]') if not title_elem: continue title = title_elem.get_text(strip=True) relative_url = title_elem.get('href', '') # 构造完整URL if relative_url.startswith('/'): full_url = f"https://www.ringdata.com{relative_url}" elif relative_url.startswith('http'): full_url = relative_url else: full_url = f"https://www.ringdata.com/{relative_url}" # 提取源和日期 source = "未知来源" date = "未知日期" # 改进的元信息提取 meta_container = item.select_one('.source, .date, .info, .meta, .time, .pub-time') if meta_container: meta_text = meta_container.get_text(strip=True) # 提取日期(优先匹配YYYY-MM-DD格式) date_match = re.search(r'(\d{4}-\d{1,2}-\d{1,2})', meta_text) if date_match: date = date_match.group(0) else: # 尝试其他日期格式 date_match = re.search(r'(\d{4}[-/年]\d{1,2}[-/月]\d{1,2}[日]?|\d{4}[-/年]\d{1,2}月|\d{4}年)', meta_text) if date_match: date = date_match.group() # 提取来源 source_match = re.search(r'来源[::]?\s*([^|\s]+)', meta_text) if source_match: source = source_match.group(1) elif len(meta_text) < 20 and not re.search(r'\d', meta_text): source = meta_text news_list.append({ "title": title, "url": full_url, "source": source, "publish_date": date }) except Exception as e: print(f"解析新闻项时出错: {str(e)}") return news_list def extract_news_content(url): """提取新闻内容和发布时间""" try: headers = get_dynamic_headers() response = requests.get(url, headers=headers, timeout=30) html = response.text soup = BeautifulSoup(html, 'html.parser') # 正文内容提取 content_elem = soup.select_one('.article-content, .content, .main-content, .news-content, .article-body') content = content_elem.get_text(strip=True) if content_elem else "" # 发布时间提取 time_elem = soup.select_one('.pub-time, .publish-date, .time, .date, .pubdate') publish_time = time_elem.get_text(strip=True) if time_elem else "" # 标准化时间格式 if publish_time: try: # 尝试转换为标准格式 dt = datetime.strptime(publish_time, '%Y-%m-%d %H:%M:%S') return content, dt.strftime('%Y-%m-%d') except: # 尝试其他格式 date_match = re.search(r'(\d{4}-\d{1,2}-\d{1,2})', publish_time) if date_match: return content, date_match.group(0) return content, publish_time return content, "" except Exception as e: print(f"提取内容失败: {str(e)}") return "", "" def extract_tech_companies(text): """从文本中提取科技公司实体""" words = pseg.cut(text) tech_companies = set() current_entity = [] bank_keywords = {'北京银行', '北银', 'BNK'} for word, flag in words: # 机构名或专有名词 if flag in ['nt', 'nz', 'j', 'x'] and word not in bank_keywords: # 检查是否是科技相关 if any(kw in word for kw in tech_keywords) or word in company_names: current_entity.append(word) elif current_entity: # 如果当前实体不为空,添加进去 entity = ''.join(current_entity) tech_companies.add(entity) current_entity = [word] # 开始新实体 else: current_entity.append(word) elif current_entity: # 遇到非机构名词,完成当前实体 entity = ''.join(current_entity) if any(kw in entity for kw in tech_keywords) or entity in company_names: tech_companies.add(entity) current_entity = [] # 处理最后一个实体 if current_entity: entity = ''.join(current_entity) if any(kw in entity for kw in tech_keywords) or entity in company_names: tech_companies.add(entity) # 过滤掉过短的词 return {c for c in tech_companies if len(c) >= 2} def analyze_cooperation(content, tech_companies): """分析内容提取合作关系""" # 合作关键词 coop_keywords = {'合作', '签约', '战略', '联手', '协作', '共同', '携手', '联盟', '协议', '合作项目', '签署', '签约仪式', '战略合作'} coop_companies = set() # 查找包含合作关键词的句子 sentences = re.split(r'[。!?;\n]', content) coop_sentences = [s for s in sentences if any(kw in s for kw in coop_keywords)] # 找出在合作句子中出现的公司 for company in tech_companies: if any(company in s for s in coop_sentences): coop_companies.add(company) return coop_companies def extract_cooperation_date(content, publish_date): """从内容中提取合作时间""" # 尝试在内容中查找具体日期 date_patterns = [ r'(\d{4})[-年](\d{1,2})[-月](\d{1,2})日?', r'(\d{4})年(\d{1,2})月', r'(\d{4})年' ] for pattern in date_patterns: match = re.search(pattern, content) if match: groups = match.groups() if len(groups) == 3: return f"{groups[0]}-{groups[1].zfill(2)}-{groups[2].zfill(2)}" elif len(groups) == 2: return f"{groups[0]}-{groups[1].zfill(2)}-01" else: return f"{groups[0]}-01-01" # 使用新闻发布日期 return publish_date def scrape_all_news(max_pages=50): """爬取所有新闻数据""" all_results = [] page_num = 1 print(f"开始爬取北京银行与科技公司合作新闻...") while page_num <= max_pages: print(f"正在处理第 {page_num} 页...") # 获取新闻列表页 list_html = fetch_news_list(page_num) if not list_html: print(f"第 {page_num} 页获取失败,停止爬取") break # 解析新闻列表 news_list = parse_news_list(list_html) if not news_list: print(f"第 {page_num} 页无新闻,停止爬取") break print(f"找到 {len(news_list)} 条新闻") # 处理每条新闻 for news in news_list: print(f" 分析新闻: {news['title'][:40]}...") # 获取新闻详情 content, detailed_date = extract_news_content(news['url']) publish_date = detailed_date or news['publish_date'] # 提取科技公司 full_text = f"{news['title']}。{content}" tech_companies = extract_tech_companies(full_text) if not tech_companies: print(" 未识别到科技公司") continue # 分析合作关系 coop_companies = analyze_cooperation(content, tech_companies) if not coop_companies: print(" 未识别到合作关系") continue # 提取合作时间 coop_date = extract_cooperation_date(content, publish_date) # 添加到结果 all_results.append({ "银行": "北京银行", "合作公司": ", ".join(coop_companies), "合作时间": coop_date, "新闻标题": news['title'], "新闻发布时间": publish_date, "新闻来源": news['source'], "新闻链接": news['url'] }) print(f" 发现合作: {', '.join(coop_companies)}") # 每个新闻间隔1-3秒 time.sleep(random.uniform(1, 3)) # 翻页间隔3-6秒 time.sleep(random.uniform(3, 6)) page_num += 1 return all_results def save_results(results): """保存结果到文件""" if not results: print("未找到符合条件的合作信息") return None # 创建数据目录 data_dir = "北京银行合作数据" os.makedirs(data_dir, exist_ok=True) # 转换为DataFrame df = pd.DataFrame(results) # 保存CSV csv_path = os.path.join(data_dir, "北京银行_科技公司合作.csv") df.to_csv(csv_path, index=False, encoding='utf-8-sig') # 保存Excel excel_path = os.path.join(data_dir, "北京银行_科技公司合作.xlsx") df.to_excel(excel_path, index=False) # 保存原始数据JSON json_path = os.path.join(data_dir, "原始数据.json") with open(json_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"保存成功: {csv_path}, {excel_path}, {json_path}") return df def generate_report(df): """生成分析报告""" if df is None or df.empty: return report_dir = "分析报告" os.makedirs(report_dir, exist_ok=True) # 按公司统计 df['合作年份'] = df['合作时间'].str.extract(r'(\d{4})')[0] company_stats = df.assign(合作公司=df['合作公司'].str.split(', ')).explode('合作公司') # 公司合作次数排名 company_count = company_stats['合作公司'].value_counts().reset_index() company_count.columns = ['公司名称', '合作次数'] company_count.to_csv(os.path.join(report_dir, '公司合作次数排名.csv'), index=False, encoding='utf-8-sig') # 年度合作趋势 year_count = company_stats['合作年份'].value_counts().sort_index().reset_index() year_count.columns = ['年份', '合作次数'] year_count.to_csv(os.path.join(report_dir, '年度合作趋势.csv'), index=False, encoding='utf-8-sig') # 热门公司TOP10 top_companies = company_stats['合作公司'].value_counts().head(10) print("\n热门合作科技公司TOP10:") print(top_companies) print("\n分析报告已生成在 '分析报告' 目录中") if __name__ == "__main__": # 开始爬取 start_time = time.time() results = scrape_all_news(max_pages=10) # 减少测试页数 # 保存结果 df = save_results(results) # 生成报告 if df is not None: generate_report(df) # 统计信息 print(f"\n共获取 {len(results) if results else 0} 条合作记录") print(f"总耗时: {time.time() - start_time:.2f} 秒")
09-02
# -*- coding: utf-8 -*- """ 人民网科技频道新闻分类系统(最终稳定版) 核心修复:自动检测单类别/小众类别数据,确保模型可训练 功能:爬虫采集 → 数据清洗(类别过滤)→ 数据探索 → 文本预处理 → SVM分类 → 模型评价 """ import requests from bs4 import BeautifulSoup import pandas as pd import numpy as np import jieba import re import matplotlib.pyplot as plt import seaborn as sns from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.svm import SVC from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score from sklearn.metrics import ( accuracy_score, classification_report, confusion_matrix, roc_curve, auc, f1_score ) from sklearn.preprocessing import LabelEncoder from sklearn.feature_selection import SelectKBest, f_classif from wordcloud import WordCloud import time from typing import List, Dict, Optional import warnings warnings.filterwarnings('ignore') # ====================== 全局配置 ====================== # 爬虫配置 HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'http://scitech.people.com.cn/', 'Connection': 'keep-alive' } BASE_URL = "http://scitech.people.com.cn" RETRY_TIMES = 3 # 爬取重试次数 DELAY_RANGE = (1.5, 3.0) # 反爬延迟范围 # 文本处理配置 STOPWORDS = set([ '的', '了', '是', '在', '有', '就', '不', '和', '也', '都', '这', '那', '我', '你', '他', '我们', '你们', '他们', '来', '去', '上', '下', '里', '外', '一个', '一些', '关于', '对于', '随着', '通过', '根据', '显示', '表明', '指出' ]) MAX_FEATURES = 5000 # TF-IDF最大特征数 TOP_N_FEATURES = 20 # 可视化TOP特征数 # 模型配置 TEST_SIZE = 0.25 RANDOM_STATE = 42 CV_FOLDS = 5 # 交叉验证折数 MIN_CATEGORY_COUNT = 5 # 最小类别样本数(少于该数的类别会被过滤) # 字体配置(Windows系统默认黑体) FONT_PATH = 'C:/Windows/Fonts/simhei.ttf' # ====================== 工具函数 ====================== def retry_request(url: str, headers: dict, timeout: int = 15) -> Optional[requests.Response]: """带重试机制的HTTP请求""" for i in range(RETRY_TIMES): try: response = requests.get(url, headers=headers, timeout=timeout) response.raise_for_status() # 抛出HTTP错误 return response except Exception as e: print(f"请求失败(第{i+1}次重试): {e}") time.sleep(np.random.uniform(*DELAY_RANGE)) return None def complete_url(relative_url: str) -> str: """补全相对URL为绝对URL""" if relative_url.startswith('http'): return relative_url return f"{BASE_URL}{relative_url.lstrip('/')}" def filter_single_category(df: pd.DataFrame) -> pd.DataFrame: """ 过滤单类别/小众类别数据(核心修复函数) :param df: 原始数据DataFrame :return: 至少包含2个有效类别的DataFrame """ print("\n" + "="*50) print("开始类别数据过滤...") # 统计每个类别的样本数 category_counts = df['category'].value_counts() print(f"原始类别分布:\n{category_counts}") # 筛选样本数≥MIN_CATEGORY_COUNT的类别 valid_categories = category_counts[category_counts >= MIN_CATEGORY_COUNT].index df_filtered = df[df['category'].isin(valid_categories)].reset_index(drop=True) # 检查过滤后是否至少有2个类别 remaining_categories = df_filtered['category'].nunique() if remaining_categories < 2: # 若不足2个类别,降低阈值到3,再次尝试 print(f"有效类别数不足2个(当前{remaining_categories}个),降低筛选阈值...") valid_categories = category_counts[category_counts >= 3].index df_filtered = df[df['category'].isin(valid_categories)].reset_index(drop=True) remaining_categories = df_filtered['category'].nunique() # 若仍不足2个类别,手动拆分类别(应急方案) if remaining_categories < 2: print("启动应急方案:根据标题关键词手动拆分类别...") df_filtered = manual_split_category(df_filtered) remaining_categories = df_filtered['category'].nunique() print(f"过滤后类别分布:\n{df_filtered['category'].value_counts()}") print(f"过滤后数据量:{len(df_filtered)}条,有效类别数:{remaining_categories}个") # 最终校验:若仍不足2个类别,抛出明确错误 if remaining_categories < 2: raise ValueError(f"数据类别不足!当前仅{remaining_categories}个类别,无法进行分类训练。请增加爬取页数或检查栏目提取逻辑。") return df_filtered def manual_split_category(df: pd.DataFrame) -> pd.DataFrame: """ 应急方案:根据标题关键词手动拆分类别(避免单类别) 可根据实际新闻内容调整关键词 """ def get_category_from_title(title: str) -> str: # 关键词-类别映射(可扩展) category_map = { '人工智能': ['AI', '人工智能', '大模型', '机器学习', '深度学习', '机器人'], '航天科技': ['航天', '太空', '卫星', '火箭', '空间站', '探月', '火星'], '电子科技': ['芯片', '半导体', '5G', '通信', '手机', '电脑', '处理器'], '生物医疗': ['生物', '医疗', '疫苗', '基因', '药物', '医院', '健康'], '新能源': ['新能源', '电池', '光伏', '风电', '电动车', '充电'], '互联网': ['互联网', 'APP', '软件', '平台', '直播', '电商'] } for cat, keywords in category_map.items(): if any(keyword in title for keyword in keywords): return cat return '综合科技' # 应用标题分类逻辑 df['category'] = df['title'].apply(get_category_from_title) return df # ====================== 1. 数据爬取(增强版) ====================== def crawl_news_detail(link: str) -> str: """爬取新闻正文内容""" response = retry_request(link, HEADERS) if not response: return "" soup = BeautifulSoup(response.content, 'html.parser') # 适配人民网多种正文结构 content_tags = soup.select('.rm_txt_con, .article-content, #rwb_article, .content') if content_tags: content = ' '.join([tag.text.strip() for tag in content_tags]) return re.sub(r'\s+', ' ', content)[:500] # 截取前500字避免过长 return "" def crawl_scitech_news(pages: int = 15) -> pd.DataFrame: """ 爬取人民网科技频道新闻数据(增加爬取页数,确保多类别) :param pages: 爬取页数(默认15页,确保覆盖足够栏目) :return: 包含标题、正文、时间、分类的DataFrame """ news_list = [] print(f"开始爬取人民网科技频道({pages}页)...") for page in range(1, pages + 1): # 构建分页URL if page == 1: url = f"{BASE_URL}/index.html" else: url = f"{BASE_URL}/index{page}.html" response = retry_request(url, HEADERS) if not response: continue soup = BeautifulSoup(response.content, 'html.parser') articles = soup.select('.ej_list_box li') if not articles: print(f"第{page}页未找到新闻条目") continue for idx, article in enumerate(articles, 1): try: # 提取核心信息 title_tag = article.select_one('a') if not title_tag: continue title = title_tag.text.strip() link = complete_url(title_tag['href']) content_summary = article.select_one('.ej_content').text.strip() if article.select_one('.ej_content') else "" publish_time = article.select_one('.ej_time').text.strip() if article.select_one('.ej_time') else "" # 优化栏目提取逻辑(增加备选选择器) category_tag = article.select_one('.ej_key a, .category, .column, .tags a') if category_tag: category = category_tag.text.strip() else: category = "综合科技" # 默认类别 # 爬取正文 full_content = crawl_news_detail(link) # 合并标题+摘要+正文作为特征 combined_content = f"{title} {content_summary} {full_content}" news_list.append({ 'title': title, 'content_summary': content_summary, 'full_content': full_content, 'combined_content': combined_content, 'publish_time': publish_time, 'category': category, 'link': link }) # 每爬5条休息一次(反爬优化) if idx % 5 == 0: time.sleep(np.random.uniform(*DELAY_RANGE) / 2) except Exception as e: print(f"第{page}页第{idx}条解析失败: {str(e)[:50]}") print(f"第{page}页爬取完成,累计{len(news_list)}条") time.sleep(np.random.uniform(*DELAY_RANGE)) df = pd.DataFrame(news_list) # 去重(基于标题) df = df.drop_duplicates(subset=['title'], keep='first').reset_index(drop=True) print(f"爬取完成,去重后共{len(df)}条新闻") return df # ====================== 2. 数据探索分析(优化版) ====================== def analyze_data_distribution(df: pd.DataFrame): """数据分布分析(基于过滤后的有效数据)""" plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False plt.figure(figsize=(12, 6)) ax = sns.countplot(x='category', data=df, palette='viridis') plt.title('科技频道新闻栏目分布(有效类别)', fontsize=14, pad=20) plt.xlabel('栏目类别', fontsize=12) plt.ylabel('新闻数量', fontsize=12) plt.xticks(rotation=45, ha='right') # 添加数值标签 for p in ax.patches: ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width()/2, p.get_height() + 0.5), ha='center', va='bottom', fontsize=10) plt.tight_layout() plt.savefig('scitech_category_distribution.png', dpi=300, bbox_inches='tight') plt.close() def analyze_text_similarity(df: pd.DataFrame, sample_size: int = 50): """文本相似度分析""" if len(df) < sample_size: sample_size = len(df) texts = df['combined_content'].fillna("").tolist()[:sample_size] vectorizer = TfidfVectorizer(max_features=500, stop_words=list(STOPWORDS)) vectors = vectorizer.fit_transform(texts) from sklearn.metrics.pairwise import cosine_similarity sim_matrix = cosine_similarity(vectors) plt.figure(figsize=(12, 10)) sns.heatmap(sim_matrix, cmap='coolwarm', vmin=0, vmax=1, annot=False) plt.title(f'新闻内容相似度矩阵(前{sample_size}条)', fontsize=14, pad=20) plt.xlabel('新闻序号', fontsize=12) plt.ylabel('新闻序号', fontsize=12) plt.tight_layout() plt.savefig('scitech_content_similarity.png', dpi=300, bbox_inches='tight') plt.close() def generate_category_wordcloud(df: pd.DataFrame): """生成各栏目词云(修复字体问题)""" for category in df['category'].unique(): # 合并该栏目所有文本 texts = df[df['category'] == category]['combined_content'].fillna("").tolist() full_text = ' '.join(texts) # 分词并过滤停用词 words = jieba.lcut(full_text) filtered_words = [word for word in words if word not in STOPWORDS and len(word) > 1] word_text = ' '.join(filtered_words) if not word_text: print(f"{category}栏目无有效文本,跳过词云生成") continue # 生成词云 wordcloud = WordCloud( font_path=FONT_PATH, width=800, height=600, background_color='white', max_words=100, collocations=False, contour_width=3 ).generate(word_text) plt.figure(figsize=(10, 8)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title(f'{category}栏目词云', fontsize=14, pad=20) plt.tight_layout() plt.savefig(f'scitech_wordcloud_{category}.png', dpi=300, bbox_inches='tight') plt.close() def explore_data(df: pd.DataFrame): """数据探索主函数""" print("\n" + "="*50) print("开始数据探索分析...") # 基础信息输出 print(f"\n数据基本信息:") print(f"总新闻数:{len(df)}") print(f"有效栏目数:{df['category'].nunique()}") print(f"栏目分布:\n{df['category'].value_counts()}") # 执行各项分析 analyze_data_distribution(df) analyze_text_similarity(df) generate_category_wordcloud(df) print("数据探索完成,可视化文件已保存") # ====================== 3. 文本预处理(增强版) ====================== def clean_text(text: Optional[str]) -> str: """文本清洗""" if not isinstance(text, str) or pd.isna(text): return "" # 保留中文和重要标点 text = re.sub(r'[^\u4e00-\u9fa5\s,。!?;:""''()【】《》、]', '', text) # 合并连续空格和换行 text = re.sub(r'\s+', ' ', text) return text.strip() def chinese_tokenizer(text: str) -> str: """中文分词(结合停用词过滤)""" words = jieba.lcut(text) filtered_words = [word for word in words if word not in STOPWORDS and len(word) > 1] return " ".join(filtered_words) def preprocess_text(df: pd.DataFrame) -> tuple: """文本预处理主函数""" print("\n" + "="*50) print("开始文本预处理...") # 1. 文本清洗 df['cleaned_content'] = df['combined_content'].apply(clean_text) # 2. 过滤空文本 df = df[df['cleaned_content'].str.len() > 5].reset_index(drop=True) print(f"过滤空文本后剩余:{len(df)}条") # 3. 分词 df['tokenized_content'] = df['cleaned_content'].apply(chinese_tokenizer) # 4. TF-IDF向量化 vectorizer = TfidfVectorizer( max_features=MAX_FEATURES, ngram_range=(1, 2), token_pattern=r'\b\w+\b', min_df=3, stop_words=list(STOPWORDS) ) X = vectorizer.fit_transform(df['tokenized_content']) print(f"TF-IDF特征矩阵维度:{X.shape}") # 5. 特征选择 selector = SelectKBest(f_classif, k=min(3000, X.shape[1])) X_selected = selector.fit_transform(X, df['category']) print(f"特征选择后维度:{X_selected.shape}") # 6. 标签编码 label_encoder = LabelEncoder() y = label_encoder.fit_transform(df['category']) print(f"标签编码映射:{dict(zip(label_encoder.classes_, range(len(label_encoder.classes_))))}") return X_selected, y, label_encoder, vectorizer, selector, df # ====================== 4. 模型构建与优化 ====================== def train_optimized_svm(X_train: np.ndarray, y_train: np.ndarray) -> tuple: """训练优化后的SVM模型(适配多类别)""" print("\n" + "="*50) print("开始模型训练与超参数优化...") print(f"训练集类别分布:{pd.Series(y_train).value_counts().to_dict()}") # 简化参数网格(加快训练速度,避免不必要的拟合) param_grid = { 'C': [1, 10, 100], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto'], 'class_weight': [None, 'balanced'] # 处理类别不平衡 } # 网格搜索(交叉验证) grid_search = GridSearchCV( estimator=SVC(probability=True, random_state=RANDOM_STATE), param_grid=param_grid, cv=CV_FOLDS, scoring='f1_weighted', # 适合多分类的评价指标 n_jobs=-1, verbose=1 ) grid_search.fit(X_train, y_train) # 输出最佳参数 print(f"最佳参数组合:{grid_search.best_params_}") print(f"交叉验证最佳F1分数:{grid_search.best_score_:.4f}") return grid_search.best_estimator_, grid_search # ====================== 5. 模型评价(完善版) ====================== def evaluate_model(model, X_test: np.ndarray, y_test: np.ndarray, label_encoder: LabelEncoder): """模型评价(多类别适配)""" print("\n" + "="*50) print("模型评价结果:") # 预测 y_pred = model.predict(X_test) y_prob = model.predict_proba(X_test) # 1. 基础评价指标 accuracy = accuracy_score(y_test, y_pred) weighted_f1 = f1_score(y_test, y_pred, average='weighted') print(f"准确率:{accuracy:.4f}") print(f"加权F1分数:{weighted_f1:.4f}") print("\n详细分类报告:") print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0)) # 2. 混淆矩阵 cm = confusion_matrix(y_test, y_pred) plt.figure(figsize=(12, 10)) sns.heatmap( cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_, annot_kws={'size': 10} ) plt.title('混淆矩阵', fontsize=14, pad=20) plt.xlabel('预测标签', fontsize=12) plt.ylabel('真实标签', fontsize=12) plt.xticks(rotation=45, ha='right') plt.yticks(rotation=0) plt.tight_layout() plt.savefig('scitech_confusion_matrix.png', dpi=300, bbox_inches='tight') plt.close() # 3. 交叉验证分数可视化 cv_scores = cross_val_score(model, X_test, y_test, cv=CV_FOLDS, scoring='f1_weighted') plt.figure(figsize=(8, 6)) sns.boxplot(x=cv_scores, palette='viridis') plt.title(f'交叉验证F1分数分布({CV_FOLDS}折)', fontsize=14, pad=20) plt.xlabel('F1分数', fontsize=12) plt.axvline(x=cv_scores.mean(), color='red', linestyle='--', label=f'平均值: {cv_scores.mean():.4f}') plt.legend() plt.tight_layout() plt.savefig('scitech_cv_scores.png', dpi=300, bbox_inches='tight') plt.close() print("模型评价完成,所有可视化文件已保存") return { 'accuracy': accuracy, 'weighted_f1': weighted_f1, 'cv_scores': cv_scores.mean() } # ====================== 6. 特征重要性分析(优化) ====================== def plot_feature_importance(model, vectorizer, selector, top_n: int = TOP_N_FEATURES): """可视化特征重要性""" print("\n" + "="*50) print("生成特征重要性可视化...") if not hasattr(model, 'coef_'): print("当前核函数不支持特征重要性计算(建议使用linear核)") return # 获取特征名称(考虑特征选择) all_features = vectorizer.get_feature_names_out() selected_mask = selector.get_support() selected_features = all_features[selected_mask] # 获取系数(多分类取绝对值最大的一类系数) if len(model.coef_) > 1: coefs = np.max(np.abs(model.coef_), axis=0) # 多分类时取各特征最大绝对系数 else: coefs = model.coef_.toarray()[0] if hasattr(model.coef_, 'toarray') else model.coef_[0] if len(coefs) != len(selected_features): print("特征系数与特征名称长度不匹配,跳过可视化") return # 排序并取TOP N sorted_idx = np.argsort(coefs)[::-1][:top_n] top_features = [selected_features[i] for i in sorted_idx] top_coefs = coefs[sorted_idx] # 绘图 plt.figure(figsize=(12, 8)) bars = plt.barh(top_features, top_coefs, color='darkgreen', alpha=0.7) plt.xlabel('特征权重(绝对值)', fontsize=12) plt.title(f'TOP {top_n} 重要特征', fontsize=14, pad=20) plt.gca().invert_yaxis() # 添加数值标签 for bar, coef in zip(bars, top_coefs): plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2, f'{coef:.3f}', ha='left', va='center', fontsize=10) plt.tight_layout() plt.savefig('scitech_feature_importance.png', dpi=300, bbox_inches='tight') plt.close() # ====================== 7. 改进建议(具体化) ====================== def get_improvement_suggestions(evaluation_results: dict): """基于模型表现的改进建议""" print("\n" + "="*50) print("模型改进建议:") suggestions = [ ( "数据层面", [ "扩展爬取范围:增加爬取页数(建议≥20页)和其他科技网站数据,提升样本多样性", "完善正文爬取:优化正文解析规则,确保更多新闻能获取完整内容", "数据平衡:对样本量较少的类别进行数据增强(如同义词替换)或合并相似栏目" ] ), ( "特征工程", [ "增加自定义特征:提取新闻长度、关键词密度、发布时间差等辅助特征", "尝试词嵌入:使用Word2Vec/GloVe/BERT等预训练模型替换TF-IDF,提升语义理解", "特征融合:结合标题特征和正文特征,使用加权融合策略" ] ), ( "模型优化", [ "尝试其他模型:对比随机森林、XGBoost、LightGBM等集成学习模型", "深度学习:使用CNN/RNN/Transformer处理文本,适合大规模数据场景", "超参数细化:扩大参数搜索范围,使用贝叶斯优化替代网格搜索" ] ), ( "工程落地", [ "构建实时爬取管道:使用定时任务(如Celery)定期更新训练数据", "模型部署:封装为API服务(如FastAPI),支持实时新闻分类请求", "监控迭代:建立模型性能监控机制,定期重新训练适应数据分布变化" ] ) ] # 根据模型表现调整建议优先级 if evaluation_results['weighted_f1'] < 0.7: print("\n⚠️ 当前模型性能一般,建议优先优化:") print("- 扩展数据量和数据质量") print("- 优化文本预处理和特征工程") elif 0.7 <= evaluation_results['weighted_f1'] < 0.85: print("\n✅ 当前模型性能良好,建议进一步优化:") print("- 尝试深度学习模型或特征融合") print("- 细化超参数搜索") else: print("\n🎉 当前模型性能优秀,建议关注:") print("- 工程化落地和实时更新机制") print("- 细粒度分类扩展") # 输出详细建议 for category, tips in suggestions: print(f"\n【{category}】") for i, tip in enumerate(tips, 1): print(f"{i}. {tip}") # ====================== 主函数(流程串联) ====================== if __name__ == "__main__": try: # 1. 数据爬取(默认15页,确保多类别) tech_df = crawl_scitech_news(pages=15) tech_df.to_csv('scitech_news_dataset.csv', index=False, encoding='utf-8-sig') print(f"\n数据已保存至:scitech_news_dataset.csv") # 2. 过滤单类别/小众类别数据(核心修复步骤) filtered_df = filter_single_category(tech_df) # 3. 数据探索 explore_data(filtered_df) # 4. 文本预处理 X, y, label_encoder, vectorizer, selector, processed_df = preprocess_text(filtered_df) # 5. 划分数据集(分层抽样,保持类别比例) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y ) print(f"\n训练集规模:{X_train.shape[0]}条,测试集规模:{X_test.shape[0]}条") print(f"训练集类别数:{len(np.unique(y_train))}个,测试集类别数:{len(np.unique(y_test))}个") # 6. 模型训练与优化 best_svm_model, grid_search = train_optimized_svm(X_train, y_train) # 7. 模型评价 eval_results = evaluate_model(best_svm_model, X_test, y_test, label_encoder) # 8. 特征重要性可视化 plot_feature_importance(best_svm_model, vectorizer, selector) # 9. 输出改进建议 get_improvement_suggestions(eval_results) # 10. 保存模型 import joblib joblib.dump({ 'model': best_svm_model, 'vectorizer': vectorizer, 'selector': selector, 'label_encoder': label_encoder, 'eval_results': eval_results }, 'scitech_news_classifier.pkl') print("\n模型已保存至:scitech_news_classifier.pkl") print("\n" + "="*50) print("科技频道新闻分类全流程完成!") except Exception as e: print(f"\n程序执行失败:{str(e)}") print("建议检查:1. 爬取页数是否足够 2. 网络连接 3. 类别提取逻辑")
11-19
Building prefix dict from the default dictionary ... Loading model from cache C:\Users\35658\AppData\Local\Temp\jieba.cache Loading model cost 0.524 seconds. Prefix dict has been built successfully. 开始爬取北京银行与科技公司合作新闻... 正在处理第 1 页... 第 1 页无新闻,停止爬取 未找到符合条件的合作信息 共获取 0 条合作记录 总耗时: 0.43 秒这段代码没结果import requests from bs4 import BeautifulSoup import pandas as pd import jieba import jieba.posseg as pseg import time import random import re from gne import GeneralNewsExtractor from datetime import datetime import os import urllib.parse # 配置jieba分词器 jieba.initialize() # 添加金融科技领域专有名词和公司名称 tech_keywords = ['科技', '技术', '数字', '智能', '数据', '信息', '云', 'AI', '区块链', '金融科技', '创新', '研发'] company_names = ['腾讯', '阿里', '百度', '京东', '字节跳动', '华为', '小米', '蚂蚁集团', '商汤科技', '旷视科技', '科大讯飞'] # 添加自定义词典 for name in company_names: jieba.add_word(name, freq=1000, tag='nt') jieba.add_word('北京银行', freq=1000, tag='nt') jieba.add_word('北银', freq=1000, tag='nt') jieba.add_word('BNK', freq=1000, tag='nt') # 初始化通用新闻抽取器 extractor = GeneralNewsExtractor() # 搜索关键词 search_query = "北京银行 科技公司 合作" encoded_query = urllib.parse.quote(search_query) # 确保正确编码 # 修复后的请求头配置 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1" } # 添加Cookie的单独处理 def get_cookies(): """正确编码和处理Cookie""" cookie_str = "x-hng=lang=zh-CN&domain=www.ringdata.com; tokenWebRefresh=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySW5mbyI6eyJ1c2VySWQiOjM4MTQ2LCJuYW1lIjoi5Yav6YC45L2zIiwidGVsUGhvbmUiOm51bGwsImVtYWlsIjoiZmVuZzEyM2ZveEBmb3htYWlsLmNvbSIsImlmTG9naW5ObHAiOmZhbHNlLCJubHBSb2xlIjpudWxsLCJubHBQZXJtaXNzaW9uIjpudWxsLCJpZkxvZ2luU3BpZGVyIjpmYWxzZSwic3BpZGVyUm9sZSI6bnVsbCwic3BpZGVyUGVybWlzc2lvbiI6bnVsbCwiaWZMb2dpbkNuZCI6ZmFsc2UsInBlcm1pc3Npb24iOm51bGwsInR5cGUiOjEsIndzS2V5IjoiMzgxNDZfMTc1NjU3ODk5MTAwMCIsInRva2VuIjpudWxsfSwidXNlcl9uYW1lIjoi5Yav6YC45L2zIiwic2NvcGUiOlsiYWxsIl0sImF0aSI6IjFjYzNjZGFjLWIwZmEtNDQ0Yi05M2ExLWM2ZWIzZTgxNzZjOSIsImV4cCI6MTgyODU4MjU5MSwianRpIjoiODVhZDljZGQtOWIwMC00ZmY5LTgyODItNGY0ZjRhYjFmZDY5IiwiY2xpZW50X2lkIjoibW9uZXR3YXJlIn0.zOAr-8CgRuNHWHnR1P6EHeUV7-xK9s71VCJh1h36isM" # 正确解析Cookie字符串 cookies = {} for cookie in cookie_str.split(';'): if '=' in cookie: key, value = cookie.strip().split('=', 1) cookies[key] = urllib.parse.quote(value, safe='') if "%" not in value else value return cookies cookies = get_cookies() # 获取正确编码的Cookie def fetch_news_list(page_num): """获取指定页面的新闻列表""" base_url = "https://www.ringdata.com/news/result" params = { "keywords": search_query, "position": page_num } try: # 使用单独传递的cookies参数 response = requests.get( base_url, params=params, headers=headers, cookies=cookies, # 使用单独传递的cookies timeout=30 ) response.raise_for_status() # 检查是否被重定向或反爬 if response.url != base_url and "login" in response.url: print("需要登录或Cookie已过期") return None return response.text except requests.exceptions.RequestException as e: print(f"获取第 {page_num} 页失败: {str(e)}") return None def parse_news_list(html_content): """解析新闻列表页""" soup = BeautifulSoup(html_content, 'html.parser') news_list = [] # 查找新闻项 - 使用更灵活的选择器 items = soup.select('.info-item, .news-item, .item, .list-item') for item in items: try: # 提取标题和链接 title_elem = item.select_one('a[href]') if not title_elem: continue title = title_elem.get_text(strip=True) relative_url = title_elem.get('href', '') # 构造完整URL if relative_url.startswith('/'): full_url = f"https://www.ringdata.com{relative_url}" elif relative_url.startswith('http'): full_url = relative_url else: full_url = f"https://www.ringdata.com/{relative_url}" # 提取源和日期 source = "未知来源" date = "未知日期" # 尝试多种方法查找日期和来源 meta_container = item.select_one('.source, .date, .info, .meta, .time') if meta_container: meta_text = meta_container.get_text(strip=True) # 使用正则提取日期 date_match = re.search(r'(\d{4}[-/年]\d{1,2}[-/月]\d{1,2}[日]?|\d{4}[-/年]\d{1,2}月|\d{4}年)', meta_text) if date_match: date = date_match.group() # 尝试提取来源 source_match = re.search(r'来源[::]?\s*([^|\s]+)', meta_text) if source_match: source = source_match.group(1) elif len(meta_text) < 20 and not re.search(r'\d', meta_text): source = meta_text news_list.append({ "title": title, "url": full_url, "source": source, "publish_date": date }) except Exception as e: print(f"解析新闻项时出错: {str(e)}") return news_list def extract_news_content(url): """使用gne抽取新闻内容和发布时间""" try: # 使用单独传递的cookies response = requests.get(url, headers=headers, cookies=cookies, timeout=30) html = response.text # 使用gne提取新闻信息 result = extractor.extract(html, with_body_html=False) # 标准化时间格式 publish_time = result.get('publish_time', '') if publish_time: try: # 尝试转换为标准格式 dt = datetime.strptime(publish_time, '%Y-%m-%d %H:%M:%S') return result['content'], dt.strftime('%Y-%m-%d') except: return result['content'], publish_time return result['content'], '' except Exception as e: print(f"使用gne提取内容失败: {str(e)}") return "", "" def extract_tech_companies(text): """从文本中提取科技公司实体""" words = pseg.cut(text) tech_companies = set() current_entity = [] bank_keywords = {'北京银行', '北银', 'BNK'} for word, flag in words: # 机构名或专有名词 if flag in ['nt', 'nz', 'j', 'x'] and word not in bank_keywords: # 检查是否是科技相关 if any(kw in word for kw in tech_keywords) or word in company_names: current_entity.append(word) elif current_entity: # 如果当前实体不为空,添加进去 entity = ''.join(current_entity) tech_companies.add(entity) current_entity = [word] # 开始新实体 else: current_entity.append(word) elif current_entity: # 遇到非机构名词,完成当前实体 entity = ''.join(current_entity) if any(kw in entity for kw in tech_keywords) or entity in company_names: tech_companies.add(entity) current_entity = [] # 处理最后一个实体 if current_entity: entity = ''.join(current_entity) if any(kw in entity for kw in tech_keywords) or entity in company_names: tech_companies.add(entity) # 过滤掉过短的词 return {c for c in tech_companies if len(c) >= 2} def analyze_cooperation(content, tech_companies): """分析内容提取合作关系""" # 合作关键词 coop_keywords = {'合作', '签约', '战略', '联手', '协作', '共同', '携手', '联盟', '协议', '合作项目', '签署', '签约仪式', '战略合作'} coop_companies = set() # 查找包含合作关键词的句子 sentences = re.split(r'[。!?;\n]', content) coop_sentences = [s for s in sentences if any(kw in s for kw in coop_keywords)] # 找出在合作句子中出现的公司 for company in tech_companies: if any(company in s for s in coop_sentences): coop_companies.add(company) return coop_companies def extract_cooperation_date(content, publish_date): """从内容中提取合作时间""" # 尝试在内容中查找具体日期 date_patterns = [ r'(\d{4})[-年](\d{1,2})[-月](\d{1,2})日?', r'(\d{4})年(\d{1,2})月', r'(\d{4})年' ] for pattern in date_patterns: match = re.search(pattern, content) if match: groups = match.groups() if len(groups) == 3: return f"{groups[0]}-{groups[1].zfill(2)}-{groups[2].zfill(2)}" elif len(groups) == 2: return f"{groups[0]}-{groups[1].zfill(2)}-01" else: return f"{groups[0]}-01-01" # 使用新闻发布日期 return publish_date def scrape_all_news(max_pages=50): """爬取所有新闻数据""" all_results = [] page_num = 1 print(f"开始爬取北京银行与科技公司合作新闻...") while page_num <= max_pages: print(f"正在处理第 {page_num} 页...") # 获取新闻列表页 list_html = fetch_news_list(page_num) if not list_html: break # 解析新闻列表 news_list = parse_news_list(list_html) if not news_list: print(f"第 {page_num} 页无新闻,停止爬取") break print(f"找到 {len(news_list)} 条新闻") # 处理每条新闻 for news in news_list: print(f" 分析新闻: {news['title'][:40]}...") # 获取新闻详情 content, detailed_date = extract_news_content(news['url']) publish_date = detailed_date or news['publish_date'] # 提取科技公司 full_text = f"{news['title']}。{content}" tech_companies = extract_tech_companies(full_text) if not tech_companies: print(" 未识别到科技公司") continue # 分析合作关系 coop_companies = analyze_cooperation(content, tech_companies) if not coop_companies: print(" 未识别到合作关系") continue # 提取合作时间 coop_date = extract_cooperation_date(content, publish_date) # 添加到结果 all_results.append({ "银行": "北京银行", "合作公司": ", ".join(coop_companies), "合作时间": coop_date, "新闻标题": news['title'], "新闻发布时间": publish_date, "新闻来源": news['source'], "新闻链接": news['url'] }) print(f" 发现合作: {', '.join(coop_companies)}") # 每个新闻间隔1-3秒 time.sleep(random.uniform(1, 3)) # 翻页间隔3-6秒 time.sleep(random.uniform(3, 6)) page_num += 1 return all_results def save_results(results): """保存结果到文件""" if not results: print("未找到符合条件的合作信息") return None # 创建数据目录 data_dir = "北京银行合作数据" os.makedirs(data_dir, exist_ok=True) # 转换为DataFrame df = pd.DataFrame(results) # 保存CSV csv_path = os.path.join(data_dir, "北京银行_科技公司合作.csv") df.to_csv(csv_path, index=False, encoding='utf-8-sig') # 保存Excel excel_path = os.path.join(data_dir, "北京银行_科技公司合作.xlsx") df.to_excel(excel_path, index=False) # 保存原始数据JSON json_path = os.path.join(data_dir, "原始数据.json") with open(json_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"保存成功: {csv_path}, {excel_path}, {json_path}") return df def generate_report(df): """生成分析报告""" if df is None or df.empty: return report_dir = "分析报告" os.makedirs(report_dir, exist_ok=True) # 按公司统计 df['合作年份'] = df['合作时间'].str.extract(r'(\d{4})')[0] company_stats = df.assign(合作公司=df['合作公司'].str.split(', ')).explode('合作公司') # 公司合作次数排名 company_count = company_stats['合作公司'].value_counts().reset_index() company_count.columns = ['公司名称', '合作次数'] company_count.to_csv(os.path.join(report_dir, '公司合作次数排名.csv'), index=False, encoding='utf-8-sig') # 年度合作趋势 year_count = company_stats['合作年份'].value_counts().sort_index().reset_index() year_count.columns = ['年份', '合作次数'] year_count.to_csv(os.path.join(report_dir, '年度合作趋势.csv'), index=False, encoding='utf-8-sig') # 热门公司TOP10 top_companies = company_stats['合作公司'].value_counts().head(10) print("\n热门合作科技公司TOP10:") print(top_companies) print("\n分析报告已生成在 '分析报告' 目录中") if __name__ == "__main__": # 开始爬取 start_time = time.time() results = scrape_all_news(max_pages=100) # 保存结果 df = save_results(results) # 生成报告 if df is not None: generate_report(df) # 统计信息 print(f"\n共获取 {len(results) if results else 0} 条合作记录") print(f"总耗时: {time.time() - start_time:.2f} 秒") 为什么,怎么办
09-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值