集合
集合是Python中的一种无序、可变、不重复元素的容器。
Python中的集合(set)与数学中的集合概念类似
也是用于保存不重复的元素。它有可变集合(set)和不可变集合(frozenset)两种。
在形式上,集合的所有元素都放在一对大括号中,两个相邻元素间使用逗号“”分隔。
集合最好的应用就是去重,因为集合中的每个元素都是唯一的。
1、集合的创建
# 创建集合
s1 = {1, 2, 3} # 直接创建
s2 = set([1, 2, 2, 3]) # 从列表创建,自动去重 → {1, 2, 3}
s3 = set("hello") # 从字符串创建 → {'h', 'e', 'l', 'o'}
# 不可变集合
fs = frozenset([1, 2, 3]) # 创建不可变集合
2. 集合操作
s = {1, 2, 3}
# 添加元素
s.add(4) # {1, 2, 3, 4}
s.update([5, 6]) # {1, 2, 3, 4, 5, 6}
# 删除元素
s.remove(3) # 删除元素,不存在则报错
s.discard(10) # 删除元素,不存在不报错
s.pop() # 随机删除并返回一个元素
s.clear() # 清空集合
3、集合运算
a = {1, 2, 3}
b = {2, 3, 4}
# 并集
a | b # {1, 2, 3, 4}
a.union(b) # 同上
# 交集
a & b # {2, 3}
a.intersection(b) # 同上
# 差集
a - b # {1}
a.difference(b) # 同上
# 对称差集
a ^ b # {1, 4}
a.symmetric_difference(b) # 同上
# 比较运算
{1, 2} <= {1, 2, 3} # True (子集判断)
{1, 2, 3} >= {1, 2} # True (超集判断)
4、集合方法
s = {1, 2, 3}
s.copy() # 浅拷贝
len(s) # 元素个数
3 in s # 成员测试
s.isdisjoint({4,5}) # 是否无交集(True)
set1 = {1, 10, 100}
# 添加元素
set1.add(1000)
set1.add(10000)
print(set1) # {1, 100, 1000, 10, 10000}
# 删除元素
set1.discard(10)
if 100 in set1:
set1.remove(100)
print(set1) # {1, 1000, 10000}
# 清空元素
set1.clear()
print(set1) # set()
Python集合(Set)案例
案例1:社交媒体共同好友分析
# 用户好友关系分析系统
users = {
'user1': {'张伟', '李娜', '王芳', '赵刚'},
'user2': {'张伟', '刘强', '王芳', '陈晨'},
'user3': {'李娜', '赵刚', '刘强', '吴晓'}
}
def find_common_friends(user_a, user_b):
"""查找两个用户的共同好友"""
if user_a in users and user_b in users:
common = users[user_a] & users[user_b]
print(f"{user_a}和{user_b}的共同好友: {common}")
return common
else:
print("用户不存在!")
return set()
def suggest_new_friends(user_id):
"""推荐可能认识的人(好友的好友)"""
if user_id in users:
# 排除已经是好友和自己
all_friends = set().union(*[users[u] for u in users if u != user_id])
suggestions = all_friends - users[user_id] - {user_id}
print(f"{user_id}可能认识的人: {suggestions}")
return suggestions
else:
print("用户不存在!")
return set()
# 示例操作
find_common_friends('user1', 'user2')
suggest_new_friends('user3')
案例2:电商平台商品分类系统
# 商品标签管理系统
products = {
'P001': {'电子产品', '手机', '苹果'},
'P002': {'电子产品', '耳机', '蓝牙'},
'P003': {'服装', '男装', 'T恤'},
'P004': {'服装', '女装', '裙子'},
'P005': {'电子产品', '手机', '安卓'}
}
def find_related_products(tags):
"""根据标签查找相关商品"""
related = set()
for pid, product_tags in products.items():
if tags.issubset(product_tags):
related.add(pid)
print(f"具有标签{tags}的商品: {related}")
return related
def get_recommendations(product_id, n=3):
"""获取商品推荐(基于共同标签)"""
if product_id in products:
target_tags = products[product_id]
scores = {}
for pid, tags in products.items():
if pid != product_id:
scores[pid] = len(target_tags & tags)
# 按相似度排序取前n个
recommended = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:n]
print(f"根据{product_id}推荐的商品: {[item[0] for item in recommended]}")
return [item[0] for item in recommended]
else:
print("商品不存在!")
return []
# 示例操作
find_related_products({'电子产品', '手机'})
get_recommendations('P001', 2)
案例3:网络安全IP地址分析
# 网络安全监控系统
normal_ips = {'192.168.1.1', '192.168.1.2', '192.168.1.3', '10.0.0.1'}
blacklisted_ips = {'10.0.0.2', '10.0.0.3', '192.168.1.100'}
recent_access = {'192.168.1.1', '192.168.1.4', '10.0.0.1', '10.0.0.3'}
def detect_suspicious_activity():
"""检测可疑活动"""
# 不在白名单且不在黑名单的新IP
new_ips = recent_access - normal_ips - blacklisted_ips
# 黑名单中的活跃IP
malicious_ips = recent_access & blacklisted_ips
# 白名单中异常活跃的IP(访问次数超过阈值)
normal_active = recent_access & normal_ips
print(f"新出现的IP地址: {new_ips}")
print(f"活跃的黑名单IP: {malicious_ips}")
print(f"活跃的正常IP: {normal_active}")
return {
'new_ips': new_ips,
'malicious_ips': malicious_ips,
'normal_active': normal_active
}
def update_blacklist(new_ips):
"""更新黑名单"""
global blacklisted_ips
blacklisted_ips.update(new_ips)
print(f"黑名单已更新: {blacklisted_ips}")
# 示例操作
activity = detect_suspicious_activity()
if activity['new_ips']:
update_blacklist(activity['new_ips'])
案例4:学术论文关键词分析
# 学术论文关键词分析系统
papers = {
'paper1': {'机器学习', '深度学习', '神经网络', '图像识别'},
'paper2': {'深度学习', '自然语言处理', '文本分类'},
'paper3': {'数据挖掘', '聚类分析', '关联规则'},
'paper4': {'机器学习', '数据挖掘', '预测模型'},
'paper5': {'神经网络', '计算机视觉', '图像识别'}
}
def find_related_papers(keywords):
"""查找相关论文(至少匹配一个关键词)"""
related = set()
for pid, paper_keywords in papers.items():
if not keywords.isdisjoint(paper_keywords):
related.add(pid)
print(f"与关键词{keywords}相关的论文: {related}")
return related
def get_trending_topics(min_papers=2):
"""发现热门研究主题(多篇论文共同关注的关键词)"""
all_keywords = set().union(*papers.values())
trending = []
for keyword in all_keywords:
count = sum(1 for paper in papers.values() if keyword in paper)
if count >= min_papers:
trending.append((keyword, count))
# 按出现频率排序
trending.sort(key=lambda x: x[1], reverse=True)
print("热门研究主题:")
for topic, count in trending:
print(f"{topic}: {count}篇论文")
return trending
# 示例操作
find_related_papers({'机器学习', '数据挖掘'})
get_trending_topics()
字典(Dict)
Python字典是另一种可变容器模型,可存储任意类型对象。如字符串、数字、元组等其他容器模型
因为字典是无序的所以不支持索引和切片。
1、创建和使用字典
# 创建字典
d1 = {'a': 1, 'b': 2} # 直接创建
d2 = dict(a=1, b=2) # 使用dict构造函数
d3 = dict([('a', 1), ('b', 2)]) # 从元组列表创建
d4 = dict(zip(['a', 'b'], [1, 2])) # 从两个列表创建
d5 = {}.fromkeys(['a', 'b'], 0) # {'a': 0, 'b': 0}
# dict函数(构造器)中的每一组参数就是字典中的一组键值对
person = dict(name='张三', age=55, height=168, weight=60, addr='北京市朝阳区66号1栋101')
print(person) # {'name': '张三', 'age': 55, 'height': 168, 'weight': 60, 'addr': '成都市武侯区科华北路62号1栋101'}
# 可以通过Python内置函数zip压缩两个序列并创建字典
items1 = dict(zip('ABCDE', '12345'))
print(items1) # {'A': '1', 'B': '2', 'C': '3', 'D': '4', 'E': '5'}
items2 = dict(zip('ABCDE', range(1, 10)))
print(items2) # {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5}
# 用字典生成式语法创建字典
items3 = {x: x ** 3 for x in range(1, 6)}
print(items3) # {1: 1, 2: 8, 3: 27, 4: 64, 5: 125}
2、字典操作
基本操作
d = {'a': 1, 'b': 2}
# 访问元素
d['a'] # 1 (键不存在会报错)
d.get('c', 0) # 0 (键不存在返回默认值)
# 添加/修改元素
d['c'] = 3 # 添加新键值对
d.update({'d':4}) # 批量更新
# 删除元素
del d['a'] # 删除键'a'
d.pop('b') # 删除并返回'b'的值
d.popitem() # 删除并返回最后插入的键值对(3.7+)
d.clear() # 清空字典
字典视图
d = {'a': 1, 'b': 2}
# 三个重要视图
d.keys() # 键视图 dict_keys(['a', 'b'])
d.values() # 值视图 dict_values([1, 2])
d.items() # 键值对视图 dict_items([('a', 1), ('b', 2)])
# 视图是动态的
d['c'] = 3
list(d.keys()) # ['a', 'b', 'c']
3. 字典方法
d = {'a': 1, 'b': 2}
# 常用方法
d.copy() # 浅拷贝
len(d) # 键的数量
'a' in d # 键存在检查
d.setdefault('c', 3) # 键存在返回其值,不存在设置默认值
# 字典推导式
{x: x**2 for x in range(5)} # {0:0, 1:1, 2:4, 3:9, 4:16}
Python字典(Dict)案例
案例1:电商平台商品管理系统
# 商品信息管理系统
products = {
"P1001": {
"name": "Apple iPhone 13",
"category": "智能手机",
"price": 5999.00,
"stock": 150,
"specs": {
"color": ["星光色", "午夜色", "粉色"],
"memory": ["128GB", "256GB", "512GB"],
"chip": "A15仿生芯片"
},
"sales": 320,
"rating": 4.8
},
"P1002": {
"name": "小米电视ES65",
"category": "智能电视",
"price": 3299.00,
"stock": 85,
"specs": {
"screen_size": "65英寸",
"resolution": "4K超高清",
"HDR": "支持"
},
"sales": 210,
"rating": 4.6
}
}
def add_product(prod_id, details):
"""添加新产品"""
products[prod_id] = details
print(f"产品 {details['name']} 已成功添加!")
def update_stock(prod_id, quantity):
"""更新库存"""
if prod_id in products:
products[prod_id]["stock"] += quantity
print(f"{products[prod_id]['name']} 库存更新为: {products[prod_id]['stock']}")
else:
print("产品ID不存在!")
# 示例操作
add_product("P1003", {
"name": "华为MateBook 14",
"category": "笔记本电脑",
"price": 5699.00,
"stock": 60,
"specs": {
"cpu": "i5-1135G7",
"memory": "16GB",
"storage": "512GB SSD"
},
"sales": 0,
"rating": 0.0
})
update_stock("P1001", -5) # 售出5台iPhone
案例2:学生成绩管理系统
# 班级学生成绩管理系统
class_3B = {
"students": {
"S2023001": {
"name": "张三",
"gender": "男",
"age": 15,
"scores": {
"math": 85,
"english": 92,
"physics": 78,
"chemistry": 88
},
"attendance": {
"present": 45,
"absent": 2,
"late": 3
}
},
"S2023002": {
"name": "李四",
"gender": "女",
"age": 16,
"scores": {
"math": 92,
"english": 95,
"physics": 85,
"chemistry": 90
},
"attendance": {
"present": 48,
"absent": 0,
"late": 1
}
}
},
"class_info": {
"class_name": "三年级B班",
"head_teacher": "王老师",
"total_students": 42,
"average_scores": {
"math": 76.5,
"english": 82.3,
"physics": 70.8,
"chemistry": 79.2
}
}
}
def calculate_student_average(student_id):
"""计算学生平均分"""
if student_id in class_3B["students"]:
scores = class_3B["students"][student_id]["scores"]
average = sum(scores.values()) / len(scores)
print(f"{class_3B['students'][student_id]['name']}的平均分: {average:.1f}")
return average
else:
print("学生ID不存在!")
return None
def update_score(student_id, subject, new_score):
"""更新学生成绩"""
if student_id in class_3B["students"]:
if subject in class_3B["students"][student_id]["scores"]:
class_3B["students"][student_id]["scores"][subject] = new_score
print(f"{class_3B['students'][student_id]['name']}的{subject}成绩已更新为: {new_score}")
else:
print("科目不存在!")
else:
print("学生ID不存在!")
# 示例操作
calculate_student_average("S2023001")
update_score("S2023002", "math", 95)
案例3:医院患者病历系统
# 医院电子病历系统
hospital_records = {
"patients": {
"PT20230001": {
"basic_info": {
"name": "王建国",
"gender": "男",
"age": 58,
"id_card": "510104196501012345",
"phone": "13800138000",
"blood_type": "A",
"allergies": ["青霉素"]
},
"medical_history": [
{
"date": "2023-03-15",
"department": "心血管内科",
"diagnosis": "高血压",
"treatment": "开具降压药",
"doctor": "张医生"
},
{
"date": "2023-05-20",
"department": "消化内科",
"diagnosis": "胃炎",
"treatment": "胃镜检查+药物治疗",
"doctor": "李医生"
}
],
"current_medications": ["降压药", "胃药"]
}
},
"departments": {
"心血管内科": {
"head": "张医生",
"location": "门诊大楼3楼",
"phone": "028-12345678"
},
"消化内科": {
"head": "李医生",
"location": "门诊大楼2楼",
"phone": "028-12345679"
}
}
}
def add_medical_record(patient_id, record):
"""添加病历记录"""
if patient_id in hospital_records["patients"]:
hospital_records["patients"][patient_id]["medical_history"].append(record)
print(f"已为{hospital_records['patients'][patient_id]['basic_info']['name']}添加病历记录")
else:
print("患者ID不存在!")
def list_patient_medications(patient_id):
"""列出患者当前用药"""
if patient_id in hospital_records["patients"]:
meds = hospital_records["patients"][patient_id]["current_medications"]
print(f"{hospital_records['patients'][patient_id]['basic_info']['name']}的当前用药:")
for i, med in enumerate(meds, 1):
print(f"{i}. {med}")
else:
print("患者ID不存在!")
# 示例操作
add_medical_record("PT20230001", {
"date": "2023-06-10",
"department": "心血管内科",
"diagnosis": "高血压复查",
"treatment": "调整用药剂量",
"doctor": "张医生"
})
list_patient_medications("PT20230001")
案例4:城市天气数据监测系统
# 城市天气数据监测平台
weather_data = {
"cities": {
"北京": {
"current": {
"temperature": 28.5,
"humidity": 65,
"wind_speed": 12,
"conditions": "晴",
"aqi": 85,
"update_time": "2023-06-15 09:00:00"
},
"forecast": [
{
"date": "2023-06-15",
"high": 30,
"low": 22,
"conditions": "晴转多云"
},
{
"date": "2023-06-16",
"high": 29,
"low": 21,
"conditions": "多云转雷阵雨"
}
]
},
"上海": {
"current": {
"temperature": 30.2,
"humidity": 78,
"wind_speed": 15,
"conditions": "多云",
"aqi": 92,
"update_time": "2023-06-15 09:00:00"
},
"forecast": [
{
"date": "2023-06-15",
"high": 31,
"low": 25,
"conditions": "多云"
},
{
"date": "2023-06-16",
"high": 32,
"low": 26,
"conditions": "雷阵雨"
}
]
}
},
"update_frequency": "每小时",
"data_source": "国家气象局"
}
def get_city_weather(city_name):
"""获取城市当前天气"""
if city_name in weather_data["cities"]:
current = weather_data["cities"][city_name]["current"]
print(f"{city_name}当前天气:")
print(f"温度: {current['temperature']}°C")
print(f"湿度: {current['humidity']}%")
print(f"风速: {current['wind_speed']}km/h")
print(f"天气状况: {current['conditions']}")
print(f"空气质量指数(AQI): {current['aqi']}")
print(f"更新时间: {current['update_time']}")
else:
print("该城市数据暂未收录!")
def add_city_weather(city_name, current_data, forecast_data):
"""添加新城市天气数据"""
weather_data["cities"][city_name] = {
"current": current_data,
"forecast": forecast_data
}
print(f"{city_name}天气数据已添加!")
# 示例操作
get_city_weather("北京")
add_city_weather("广州",
{
"temperature": 32.0,
"humidity": 80,
"wind_speed": 10,
"conditions": "雷阵雨",
"aqi": 75,
"update_time": "2023-06-15 09:00:00"
},
[
{
"date": "2023-06-15",
"high": 33,
"low": 27,
"conditions": "雷阵雨"
},
{
"date": "2023-06-16",
"high": 34,
"low": 28,
"conditions": "多云"
}
]
)
案例5:餐厅菜单与订单管理系统
# 餐厅点餐系统
restaurant = {
"menu": {
"M001": {
"name": "宫保鸡丁",
"category": "川菜",
"price": 38,
"ingredients": ["鸡胸肉", "花生", "干辣椒", "葱"],
"spicy_level": "中辣",
"prep_time": 15 # 分钟
},
"M002": {
"name": "清蒸鲈鱼",
"category": "粤菜",
"price": 68,
"ingredients": ["鲈鱼", "姜", "葱", "蒸鱼豉油"],
"spicy_level": "不辣",
"prep_time": 20
}
},
"orders": {
"T001": {
"table": 5,
"customers": 4,
"items": [
{"menu_id": "M001", "quantity": 1, "special_requests": "少放花生"},
{"menu_id": "M002", "quantity": 1}
],
"status": "准备中",
"order_time": "2023-06-15 18:30:00"
}
},
"staff": {
"waiter": ["张三", "李四"],
"chef": ["王师傅", "赵师傅"]
}
}
def place_order(table_num, customer_count, items):
"""下新订单"""
order_id = f"T{len(restaurant['orders']) + 1:03d}"
restaurant["orders"][order_id] = {
"table": table_num,
"customers": customer_count,
"items": items,
"status": "已接收",
"order_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
print(f"订单{order_id}已接收!")
def calculate_order_total(order_id):
"""计算订单总金额"""
if order_id in restaurant["orders"]:
total = 0
for item in restaurant["orders"][order_id]["items"]:
menu_id = item["menu_id"]
total += restaurant["menu"][menu_id]["price"] * item["quantity"]
print(f"订单{order_id}总金额: {total}元")
return total
else:
print("订单不存在!")
return 0
# 示例操作
place_order(8, 2, [
{"menu_id": "M001", "quantity": 1},
{"menu_id": "M002", "quantity": 1, "special_requests": "少放姜"}
])
calculate_order_total("T001")