《Machine.Learning.in.Action》pdf

本文提供了网盘资源的下载链接及方法。介绍了如何通过指定的网盘进行文件下载。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Machine Learning in Action is unique book that blends the foundational theories of machine learning with the practical realities of building tools for everyday data analysis. You'll use the flexible Python programming language to build programs that implement algorithms for data classification, forecasting, recommendations, and higher-level features like summarization and simplification. About the Book A machine is said to learn when its performance improves with experience. Learning requires algorithms and programs that capture data and ferret out the interesting or useful patterns. Once the specialized domain of analysts and mathematicians, machine learning is becoming a skill needed by many. Machine Learning in Action is a clearly written tutorial for developers. It avoids academic language and takes you straight to the techniques you'll use in your day-to-day work. Many (Python) examples present the core algorithms of statistical data processing, data analysis, and data visualization in code you can reuse. You'll understand the concepts and how they fit in with tactical tasks like classification, forecasting, recommendations, and higher-level features like summarization and simplification. Readers need no prior experience with machine learning or statistical processing. Familiarity with Python is helpful. What's InsideA no-nonsense introduction Examples showing common ML tasks Everyday data analysis Implementing classic algorithms like Apriori and Adaboos =================================== Table of ContentsPART 1 CLASSIFICATION Machine learning basics Classifying with k-Nearest Neighbors Splitting datasets one feature at a time: decision trees Classifying with probability theory: naïve Bayes Logistic regression Support vector machines Improving classification with the AdaBoost meta algorithm PART 2 FORECASTING NUMERIC VALUES WITH REGRESSION Predicting numeric values: regression Tree-based regression PART 3 UNSUPERVISED LEARNING Grouping unlabeled items using k-means clustering Association analysis with the Apriori algorithm Efficiently finding frequent itemsets with FP-growth PART 4 ADDITIONAL TOOLS Using principal component analysis to simplify data Simplifying data with the singular value decomposition Big data and MapReduce
# E:\AI_System\agent\knowledge_system\knowledge_visualizer.py import matplotlib.pyplot as plt import networkx as nx from datetime import datetime import numpy as np from knowledge_manager import KnowledgeManager from self_directed_learning import AutonomousLearningEngine class KnowledgeVisualizer: def __init__(self, knowledge_manager, learning_engine=None): self.km = knowledge_manager self.learning_engine = learning_engine def visualize_knowledge_graph(self, min_strength=0.1, figsize=(14, 10)): """可视化知识图谱""" G = nx.DiGraph() # 添加知识节点 for kid, entry in self.km.knowledge_base["knowledge"].items(): G.add_node(kid, title=entry["title"], type="knowledge", category=entry.get("category", "未分类")) # 添加关系边 for rel in self.km.knowledge_base["relationships"]: if rel["strength"] >= min_strength: G.add_edge( rel["source"], rel["target"], rel_type=rel["type"], strength=rel["strength"] ) if len(G.nodes) == 0: print("知识库为空,无法生成图谱") return plt.figure(figsize=figsize) # 按类别着色 categories = set(nx.get_node_attributes(G, 'category').values()) color_map = plt.cm.tab10(np.linspace(0, 1, len(categories))) category_colors = {cat: color_map[i] for i, cat in enumerate(categories)} node_colors = [] for node in G.nodes: category = G.nodes[node].get('category', '未分类') node_colors.append(category_colors[category]) # 使用弹簧布局 pos = nx.spring_layout(G, seed=42, k=0.15) # 绘制节点 nx.draw_networkx_nodes( G, pos, node_size=800, node_color=node_colors, alpha=0.9 ) # 绘制边 edge_colors = [] edge_widths = [] for u, v, d in G.edges(data=True): edge_colors.append('gray') edge_widths.append(d['strength'] * 4 + 1) nx.draw_networkx_edges( G, pos, width=edge_widths, edge_color=edge_colors, alpha=0.6, arrows=True, arrowsize=15 ) # 绘制标签 labels = {node: data['title'] for node, data in G.nodes(data=True)} nx.draw_networkx_labels( G, pos, labels=labels, font_size=9, font_family='sans-serif' ) # 添加图例 legend_handles = [] for cat, color in category_colors.items(): legend_handles.append(plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=cat)) plt.legend(handles=legend_handles, loc='best') plt.title("知识图谱", fontsize=16) plt.axis('off') plt.tight_layout() plt.show() def visualize_knowledge_timeline(self): """可视化知识创建时间线""" if not self.km.knowledge_base["knowledge"]: print("知识库为空") return dates = [] for entry in self.km.knowledge_base["knowledge"].values(): dates.append(datetime.fromisoformat(entry["created_at"])) # 按月统计 months = defaultdict(int) for date in dates: month_key = date.strftime("%Y-%m") months[month_key] += 1 # 按时间排序 sorted_months = sorted(months.items(), key=lambda x: x[0]) x = [item[0] for item in sorted_months] y = [item[1] for item in sorted_months] plt.figure(figsize=(12, 6)) plt.plot(x, y, marker='o', linestyle='-', color='b') plt.fill_between(x, y, color='skyblue', alpha=0.4) plt.title("知识创建时间线") plt.xlabel("月份") plt.ylabel("知识数量") plt.xticks(rotation=45) plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.show() def visualize_learning_progress(self): """可视化学习进度(如果学习引擎可用)""" if not self.learning_engine: print("未提供学习引擎") return # 获取核心能力数据 core_skills = self.learning_engine.learning_journey["core_skills"] if not core_skills: print("无核心能力数据") return skills = list(core_skills.keys()) values = list(core_skills.values()) # 创建雷达图 angles = np.linspace(0, 2 * np.pi, len(skills), endpoint=False).tolist() values += values[:1] # 闭合图形 angles += angles[:1] # 闭合角度 fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True)) ax.fill(angles, values, color='skyblue', alpha=0.25) ax.plot(angles, values, color='b', linewidth=2) # 设置标签 ax.set_xticks(angles[:-1]) ax.set_xticklabels(skills) ax.set_yticklabels([]) # 设置标题 plt.title("核心能力发展雷达图", size=16, y=1.1) plt.tight_layout() plt.show() # 学习时间分布 learning_times = [] for node in self.learning_engine.learning_journey["nodes"].values(): if node["title"] != "初始状态": learning_times.append(node["duration"]) if learning_times: plt.figure(figsize=(10, 6)) plt.hist(learning_times, bins=10, color='lightgreen', edgecolor='darkgreen') plt.title("学习时长分布") plt.xlabel("学习时长(小时)") plt.ylabel("频次") plt.grid(axis='y', alpha=0.75) plt.tight_layout() plt.show() # 使用示例 if __name__ == "__main__": km = KnowledgeManager() learner = AutonomousLearningEngine("user123", km) visualizer = KnowledgeVisualizer(km, learner) # 可视化知识图谱 visualizer.visualize_knowledge_graph() # 可视化时间线 visualizer.visualize_knowledge_timeline() # 可视化学习进度 visualizer.visualize_learning_progress() # E:\AI_System\agent\knowledge_system\knowledge_integration.py import os import json import requests from bs4 import BeautifulSoup from knowledge_manager import KnowledgeManager from config import WEB_SOURCES, API_KEYS class KnowledgeIntegrator: def __init__(self, knowledge_manager): self.km = knowledge_manager self.integration_history = [] def integrate_from_web(self, url, category=None): """从网页抓取并整合知识""" try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 提取标题和内容 title = soup.title.string if soup.title else "未命名知识" content = "" for p in soup.find_all('p'): content += p.get_text() + "\n\n" # 添加到知识库 knowledge_id = self.km.add_knowledge( title=title, content=content, category=category, tags=["web_import"] ) # 记录整合历史 self.integration_history.append({ "source": url, "type": "web", "knowledge_id": knowledge_id, "timestamp": datetime.now().isoformat() }) print(f"从网页整合知识: {title}") return knowledge_id except Exception as e: print(f"网页整合失败: {e}") return None def integrate_from_api(self, api_name, query): """从API获取并整合知识""" try: # 根据API名称获取配置 api_config = API_KEYS.get(api_name) if not api_config: print(f"未配置的API: {api_name}") return None # 调用API(示例:Wikipedia API) if api_name == "wikipedia": endpoint = "https://en.wikipedia.org/w/api.php" params = { "action": "query", "format": "json", "titles": query, "prop": "extracts", "exintro": True, "explaintext": True } response = requests.get(endpoint, params=params) data = response.json() # 提取知识 pages = data.get("query", {}).get("pages", {}) for page_id, page_data in pages.items(): if page_id == "-1": continue title = page_data.get("title", query) content = page_data.get("extract", "无可用内容") # 添加到知识库 knowledge_id = self.km.add_knowledge( title=title, content=content, category="Wikipedia", tags=["api_import", "wikipedia"] ) # 记录整合历史 self.integration_history.append({ "source": f"{api_name}:{query}", "type": "api", "knowledge_id": knowledge_id, "timestamp": datetime.now().isoformat() }) print(f"从API整合知识: {title}") return knowledge_id return None except Exception as e: print(f"API整合失败: {e}") return None def integrate_from_file(self, file_path, category=None): """从文件导入知识(支持文本、PDF等)""" try: # 根据文件类型处理 if file_path.endswith('.txt'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() title = os.path.basename(file_path).replace('.txt', '') elif file_path.endswith('.pdf'): # 需要安装PyPDF2: pip install PyPDF2 import PyPDF2 content = "" with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: content += page.extract_text() + "\n" title = os.path.basename(file_path).replace('.pdf', '') else: print("不支持的文件类型") return None # 添加到知识库 knowledge_id = self.km.add_knowledge( title=title, content=content[:5000], # 限制长度 category=category, tags=["file_import"] ) # 记录整合历史 self.integration_history.append({ "source": file_path, "type": "file", "knowledge_id": knowledge_id, "timestamp": datetime.now().isoformat() }) print(f"从文件整合知识: {title}") return knowledge_id except Exception as e: print(f"文件整合失败: {e}") return None def auto_integrate_scheduled(self): """自动执行预定的整合任务""" for source in WEB_SOURCES: if source["type"] == "web": self.integrate_from_web(source["url"], source.get("category")) elif source["type"] == "api": self.integrate_from_api(source["api"], source["query"]) def get_integration_history(self): """获取整合历史记录""" return self.integration_history # 使用示例 if __name__ == "__main__": km = KnowledgeManager() integrator = KnowledgeIntegrator(km) # 从网页整合知识 integrator.integrate_from_web("https://en.wikipedia.org/wiki/Machine_learning", category="AI") # 从API整合知识 integrator.integrate_from_api("wikipedia", "Artificial intelligence") # 从文件整合知识 integrator.integrate_from_file("sample.txt", category="文档") # 保存知识库 km.save_knowledge() 这俩你需要吗?
最新发布
08-11
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值