Python数据结构深度解析:集合与字典的异同
1. 基本概念对比
集合(Set)
# 集合 - 无序的唯一元素集合
fruits_set = {"apple", "banana", "orange", "apple"} # 重复元素自动去重
print(fruits_set) # 输出: {'banana', 'orange', 'apple'}
numbers_set = {1, 2, 3, 4, 5}
empty_set = set() # 注意:不能使用 {} 创建空集合
字典 - 键值对集合
# 字典 - 键值对集合
person_dict = {
"name": "张三",
"age": 25,
"city": "北京"
}
print(person_dict) # 输出: {'name': '张三', 'age': 25, 'city': '北京'}
empty_dict = {} # 空字典
2. 核心异同对比表
| 特性 | 集合(Set) | 字典(Dict) |
|---|---|---|
| 存储内容 | 单个元素 | 键值对(key-value) |
| 元素要求 | 必须可哈希 | 键必须可哈希,值可以是任意类型 |
| 顺序性 | 无序(Python 3.7+ 保持插入顺序) | 保持插入顺序(Python 3.7+) |
| 唯一性 | 元素唯一 | 键唯一,值可以重复 |
| 空创建 | set() | {} 或 dict() |
| 索引访问 | 不支持 | 通过键访问:dict[key] |
| 可变性 | 可变 | 可变 |
3. 创建方式对比
集合创建方式
# 1. 花括号创建
set1 = {1, 2, 3, 4, 5}
# 2. set() 函数从可迭代对象创建
set2 = set([1, 2, 3, 2, 1]) # 输出: {1, 2, 3}
set3 = set("hello") # 输出: {'h', 'e', 'l', 'o'}
# 3. 集合推导式
set4 = {x for x in range(10) if x % 2 == 0} # 输出: {0, 2, 4, 6, 8}
# 注意:空集合必须用 set(),{} 创建的是空字典
empty_set = set()
print(type(empty_set)) # <class 'set'>
字典创建方式
# 1. 花括号创建
dict1 = {"name": "Alice", "age": 25}
# 2. dict() 函数创建
dict2 = dict(name="Bob", age=30)
dict3 = dict([("name", "Charlie"), ("age", 35)])
# 3. 字典推导式
dict4 = {x: x**2 for x in range(5)} # 输出: {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# 4. fromkeys方法
dict5 = dict.fromkeys(["name", "age", "city"], "unknown")
empty_dict = {}
print(type(empty_dict)) # <class 'dict'>
4. 操作和方法对比
基本操作对比
# 集合操作
fruits = {"apple", "banana", "orange"}
# 添加元素
fruits.add("grape")
fruits.update(["kiwi", "mango"])
# 删除元素
fruits.remove("apple") # 不存在会报错
fruits.discard("banana") # 不存在不会报错
popped = fruits.pop() # 随机删除一个元素
print("集合操作后:", fruits)
# 字典操作
person = {"name": "张三", "age": 25, "city": "北京"}
# 添加/修改
person["email"] = "zhang@example.com" # 添加
person["age"] = 26 # 修改
# 访问
name = person["name"] # 直接访问
age = person.get("age") # 安全访问
salary = person.get("salary", 5000) # 带默认值
# 删除
del person["city"] # 删除键值对
email = person.pop("email") # 删除并返回值
key, value = person.popitem() # 删除最后插入的键值对
print("字典操作后:", person)
常用方法对比
# 集合方法示例
set_a = {1, 2, 3, 4, 5}
set_b = {4, 5, 6, 7, 8}
print("集合A:", set_a)
print("集合B:", set_b)
# 集合运算
print("并集:", set_a | set_b) # 或 set_a.union(set_b)
print("交集:", set_a & set_b) # 或 set_a.intersection(set_b)
print("差集:", set_a - set_b) # 或 set_a.difference(set_b)
print("对称差集:", set_a ^ set_b) # 或 set_a.symmetric_difference(set_b)
# 集合关系判断
print("A是B的子集?", set_a.issubset(set_b))
print("A是B的超集?", set_a.issuperset(set_b))
print("A和B有交集?", set_a.isdisjoint(set_b))
# 字典方法示例
dict_a = {"a": 1, "b": 2, "c": 3}
dict_b = {"c": 30, "d": 4, "e": 5}
print("\n字典A:", dict_a)
print("字典B:", dict_b)
# 字典操作
print("所有键:", dict_a.keys())
print("所有值:", dict_a.values())
print("所有键值对:", dict_a.items())
# 字典合并
dict_a.update(dict_b) # 原地合并,相同键会被覆盖
print("合并后:", dict_a)
# 获取并设置默认值
value = dict_a.setdefault("f", 100) # 如果键不存在,设置默认值
print("设置默认值后:", dict_a)
5. 实际应用场景
集合的应用场景
# 场景1: 数据去重
def remove_duplicates(data):
"""去除列表中的重复元素"""
return list(set(data))
numbers = [1, 2, 2, 3, 4, 4, 4, 5]
unique_numbers = remove_duplicates(numbers)
print("去重结果:", unique_numbers)
# 场景2: 成员测试(比列表快)
def check_permissions(user, allowed_users):
"""检查用户权限"""
return user in allowed_users
admin_users = {"admin", "root", "superuser"}
print("用户admin有权限:", check_permissions("admin", admin_users))
print("用户guest有权限:", check_permissions("guest", admin_users))
# 场景3: 关系运算
def find_common_interests(user1_interests, user2_interests):
"""找到共同兴趣"""
return user1_interests & user2_interests
user1 = {"篮球", "音乐", "编程", "旅游"}
user2 = {"编程", "读书", "音乐", "电影"}
common = find_common_interests(user1, user2)
print("共同兴趣:", common)
字典的应用场景
# 场景1: 数据映射和查找表
def build_grade_mapping(scores):
"""构建分数到等级的映射"""
grade_map = {
(90, 100): "A",
(80, 89): "B",
(70, 79): "C",
(60, 69): "D",
(0, 59): "F"
}
result = {}
for student, score in scores.items():
for score_range, grade in grade_map.items():
if score_range[0] <= score <= score_range[1]:
result[student] = grade
break
return result
scores = {"张三": 85, "李四": 92, "王五": 78}
grades = build_grade_mapping(scores)
print("成绩等级:", grades)
# 场景2: 配置管理
class Config:
def __init__(self):
self.settings = {
"database": {
"host": "localhost",
"port": 5432,
"name": "myapp"
},
"server": {
"port": 8000,
"debug": True
}
}
def get(self, key_path, default=None):
"""通过路径获取配置值"""
keys = key_path.split('.')
current = self.settings
for key in keys:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return default
return current
config = Config()
print("数据库主机:", config.get("database.host"))
print("服务器端口:", config.get("server.port"))
# 场景3: 计数器
def count_elements(data):
"""统计元素出现次数"""
counter = {}
for item in data:
counter[item] = counter.get(item, 0) + 1
return counter
words = ["apple", "banana", "apple", "orange", "banana", "apple"]
word_count = count_elements(words)
print("单词计数:", word_count)
6. 性能特点
import time
# 集合和字典的查找性能对比
def performance_comparison():
large_list = list(range(1000000))
large_set = set(large_list)
large_dict = {x: x for x in large_list}
target = 999999
# 列表查找
start = time.time()
found_list = target in large_list
list_time = time.time() - start
# 集合查找
start = time.time()
found_set = target in large_set
set_time = time.time() - start
# 字典查找(通过键)
start = time.time()
found_dict = target in large_dict
dict_time = time.time() - start
print(f"列表查找时间: {list_time:.6f}秒")
print(f"集合查找时间: {set_time:.6f}秒")
print(f"字典查找时间: {dict_time:.6f}秒")
print(f"集合比列表快 {list_time/set_time:.0f} 倍")
print(f"字典比列表快 {list_time/dict_time:.0f} 倍")
performance_comparison()
7. 特殊类型
不可变集合(frozenset)
# frozenset - 不可变集合
frozen = frozenset([1, 2, 3, 4, 5])
print("不可变集合:", frozen)
# 可以用作字典的键
valid_dict = {frozen: "这是一个集合键"}
print("使用frozenset作为键:", valid_dict)
# 但不能修改
try:
frozen.add(6)
except AttributeError as e:
print(f"错误: {e}")
字典视图对象
# 字典的keys(), values(), items()返回视图对象
person = {"name": "Alice", "age": 25, "city": "New York"}
keys_view = person.keys()
values_view = person.values()
items_view = person.items()
print("键视图:", keys_view)
print("值视图:", values_view)
print("键值对视图:", items_view)
# 视图是动态的
person["country"] = "USA"
print("添加元素后的键视图:", keys_view) # 自动更新
8. 综合应用示例
def analyze_data(data):
"""综合使用集合和字典进行数据分析"""
# 使用集合进行唯一性分析
unique_categories = set(item["category"] for item in data)
print(f"唯一类别数量: {len(unique_categories)}")
print(f"所有类别: {unique_categories}")
# 使用字典进行统计
category_count = {}
price_stats = {}
for item in data:
# 类别计数
category = item["category"]
category_count[category] = category_count.get(category, 0) + 1
# 价格统计
price = item["price"]
if category not in price_stats:
price_stats[category] = []
price_stats[category].append(price)
# 计算平均价格
avg_prices = {}
for category, prices in price_stats.items():
avg_prices[category] = sum(prices) / len(prices)
return {
"unique_categories": unique_categories,
"category_counts": category_count,
"average_prices": avg_prices
}
# 测试数据
sample_data = [
{"name": "商品A", "category": "电子", "price": 1000},
{"name": "商品B", "category": "服装", "price": 200},
{"name": "商品C", "category": "电子", "price": 1500},
{"name": "商品D", "category": "食品", "price": 50},
{"name": "商品E", "category": "服装", "price": 300},
]
result = analyze_data(sample_data)
print("分析结果:", result)
总结
相同点:
- 都是无序的(但保持插入顺序)
- 都使用花括号 {}(但空集合用 set())
- 都要求元素/键是可哈希的
- 查找操作都非常高效(O(1)时间复杂度)
不同点:
- 集合存储单个元素,字典存储键值对
- 集合保证元素唯一性,字典保证键唯一性
- 集合主要用于去重和关系运算,字典主要用于映射和查找
选择建议:
- 需要去重或集合运算 → 使用集合
- 需要键值映射或快速查找 → 使用字典
- 需要不可变版本 → 使用frozenset
理解它们的区别能帮助你在合适的场景选择合适的数据结构!

被折叠的 条评论
为什么被折叠?



