【Python数据处理：列表、字典与集合】-优快云博客

Python数据处理：列表、字典与集合

一、列表(List) - 有序的数据容器

列表是Python中最灵活的数据结构之一，它可以存储不同类型的数据，并且支持动态修改。

1.1 列表的基本操作

# 创建列表的不同方式
numbers = [1, 2, 3, 4, 5]                    # 直接创建
fruits = list(("苹果", "香蕉", "橙子"))       # 使用list()函数
mixed_list = [10, "文本", 3.14, True]         # 混合类型列表

print(f"数字列表: {numbers}")
print(f"水果列表: {fruits}")
print(f"混合列表: {mixed_list}")

1.2 列表的增删改查

# 创建一个学生名单
students = ["张三", "李四", "王五"]
print(f"原始名单: {students}")

# 增加元素
students.append("小明")                     # 末尾添加
print(f"追加后: {students}")

students.insert(1, "老王")                  # 指定位置插入
print(f"插入后: {students}")

# 删除元素
removed_student = students.pop(2)           # 删除并返回被删除元素
print(f"删除索引2的元素: {students}, 被删除的是: {removed_student}")

students.remove("李四")                     # 按值删除
print(f"删除'李四'后: {students}")

# 修改元素
students[0] = "小明"                      # 直接赋值修改
print(f"修改后: {students}")

# 查询元素
if "王五" in students:
    print("王五在名单中")
    
index = students.index("小明")              # 查找索引
print(f"小明的索引位置: {index}")

1.3 列表的切片与排序

# 切片操作 - 获取子列表
numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

print(f"前5个: {numbers[:5]}")              # [0, 1, 2, 3, 4]
print(f"索引3到7: {numbers[3:8]}")          # [3, 4, 5, 6, 7]
print(f"每隔2个取一个: {numbers[::2]}")     # [0, 2, 4, 6, 8]
print(f"倒序: {numbers[::-1]}")             # [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# 排序操作
scores = [88, 92, 75, 96, 84, 100, 67]

# 升序排序（创建新列表）
sorted_scores = sorted(scores)
print(f"升序排序: {sorted_scores}")

# 降序排序（原地修改）
scores.sort(reverse=True)
print(f"降序排序: {scores}")

# 复杂对象排序
students = [
    ("张三", 88),
    ("李四", 92),
    ("王五", 75)
]

# 按成绩排序
students.sort(key=lambda x: x[1], reverse=True)
print(f"按成绩排序: {students}")

1.4 列表推导式 - Pythonic的写法

# 传统方式：创建一个平方数列表
squares = []
for i in range(1, 11):
    squares.append(i ** 2)
print(f"传统方式: {squares}")

# 列表推导式：更简洁的写法
squares = [i ** 2 for i in range(1, 11)]
print(f"列表推导式: {squares}")

# 带条件的列表推导式
even_squares = [i ** 2 for i in range(1, 11) if i % 2 == 0]
print(f"偶数的平方: {even_squares}")

# 二维列表推导式
matrix = [[j for j in range(1, 4)] for i in range(3)]
print(f"3x3矩阵: {matrix}")

二、字典(Dictionary) - 键值对

字典通过键值对存储数据，可快速查找。

2.1 字典的创建与基本操作

# 创建字典的多种方式
student1 = {"姓名": "张三", "年龄": 20, "成绩": 88.5}          # 直接创建
student2 = dict(姓名="李四", 年龄=21, 成绩=92.0)               # 使用dict()函数
student3 = dict([("姓名", "王五"), ("年龄", 19), ("成绩", 75.0)])  # 元组列表

print(f"学生1: {student1}")
print(f"学生2: {student2}")
print(f"学生3: {student3}")

# 字典的增删改查
employee = {"姓名": "张三", "部门": "技术部"}

# 添加/修改元素
employee["薪资"] = 8000                   # 添加新键值对
employee["部门"] = "研发部"               # 修改已有键的值

# 批量更新
employee.update({"职级": "P6", "入职年份": 2020})
print(f"更新后: {employee}")

# 删除元素
salary = employee.pop("薪资")             # 删除并返回值
print(f"删除薪资后: {employee}, 薪资是: {salary}")

# 安全获取值
name = employee.get("姓名", "未知")        # 如果键不存在返回默认值
phone = employee.get("电话", "未填写")
print(f"姓名: {name}, 电话: {phone}")

# 获取所有键、值、键值对
print(f"所有键: {list(employee.keys())}")
print(f"所有值: {list(employee.values())}")
print(f"所有键值对: {list(employee.items())}")

2.2 字典的遍历

# 一个更复杂的字典示例
student_info = {
    "基本信息": {"姓名": "李华", "年龄": 20, "性别": "男"},
    "学习成绩": {"数学": 88, "英语": 92, "物理": 85},
    "联系方式": {"电话": "13888888888", "邮箱": "lihua@example.com"}
}

# 遍历字典的键
for key in student_info:
    print(f"分类: {key}")

# 遍历字典的值
for value in student_info.values():
    print(f"值: {value}")

# 遍历键值对
for key, value in student_info.items():
    print(f"{key}: {value}")

# 多层嵌套遍历
for category, details in student_info.items():
    print(f"\n{category}:")
    if isinstance(details, dict):  # 判断是否为字典
        for sub_key, sub_value in details.items():
            print(f"  {sub_key}: {sub_value}")

2.3 字典推导式

# 创建一个数字平方的字典
square_dict = {x: x**2 for x in range(1, 6)}
print(f"数字平方字典: {square_dict}")

# 从两个列表创建字典
keys = ["name", "age", "city"]
values = ["张三", 25, "上海"]
person = {k: v for k, v in zip(keys, values)}
print(f"个人信息字典: {person}")

# 带条件的字典推导式
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
even_squares = {n: n**2 for n in numbers if n % 2 == 0}
print(f"偶数的平方字典: {even_squares}")

2.4 字典的合并与嵌套

# 字典合并
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
dict3 = {"b": 20, "e": 5}  # 注意：b键冲突

# 方法1：update() - 原地修改，后面的覆盖前面的
merged1 = dict1.copy()  # 先复制，避免修改原字典
merged1.update(dict2)
merged1.update(dict3)
print(f"方法1合并结果: {merged1}")

# 方法2：字典解包（Python 3.9+）
merged2 = {**dict1, **dict2, **dict3}
print(f"方法2合并结果: {merged2}")

# 嵌套字典示例
company = {
    "技术部": {
        "前端组": ["张三", "李四"],
        "后端组": ["王五", "小明", "老王"],
        "测试组": ["小王"]
    },
    "市场部": {
        "销售组": ["小李", "王二"],
        "策划组": ["老李"]
    }
}

# 访问嵌套字典
print(f"后端组成员: {company['技术部']['后端组']}")
print(f"公司总人数: {sum(len(group) for dept in company.values() for group in dept.values())}")

dept in company.values(): 遍历公司每个部门（技术部、市场部）
group in dept.values(): "遍历每个部门的每个小组（前端组、后端组、测试组、销售组、策划组）
sum(...): “将所有小组的人数相加”

三、集合(Set) - 去重与集合运算

集合是一个无序、不重复元素的集合，适合去重和数学集合运算。

3.1 集合的基本操作

# 创建集合
colors = {"红色", "绿色", "蓝色", "黄色"}          # 直接创建
numbers = set([1, 2, 2, 3, 3, 3, 4, 5])           # 从列表创建（自动去重）
empty_set = set()                                 # 空集合（不能用{}，那是字典）

print(f"颜色集合: {colors}")
print(f"数字集合: {numbers}")                     # 注意：重复元素被去除了
print(f"空集合: {empty_set}")

# 集合的增删
colors.add("紫色")                                # 添加单个元素
print(f"添加紫色后: {colors}")

colors.update(["橙色", "粉色", "黑色"])           # 添加多个元素
print(f"添加多个颜色后: {colors}")

colors.remove("黄色")                             # 删除元素（元素必须存在）
print(f"删除黄色后: {colors}")

colors.discard("白色")                            # 安全删除（元素不存在也不会报错）
print(f"安全删除白色后: {colors}")

3.2 集合运算

# 定义两个集合
math_students = {"张三", "李四", "王五", "大明", "小龙"}
physics_students = {"小张", "小李", "小王", "小明", "小龙"}

print(f"数学学生: {math_students}")
print(f"物理学生: {physics_students}")

# 并集：所有学生
all_students = math_students | physics_students  # 或使用 union()
print(f"所有学生（并集）: {all_students}")

# 交集：两门课都学的学生
both_subjects = math_students & physics_students  # 或使用 intersection()
print(f"两门课都学的学生（交集）: {both_subjects}")

# 差集：只学数学的学生
only_math = math_students - physics_students      # 或使用 difference()
print(f"只学数学的学生（差集）: {only_math}")

# 对称差集：只学一门课的学生
only_one_subject = math_students ^ physics_students  # 或使用 symmetric_difference()
print(f"只学一门课的学生（对称差集）: {only_one_subject}")

3.3 集合推导式

# 创建10以内偶数的集合
even_numbers = {x for x in range(1, 11) if x % 2 == 0}
print(f"10以内的偶数集合: {even_numbers}")

# 从字符串创建字符集合（自动去重）
word = "hello"
unique_chars = {char for char in word}
print(f"'hello'中的唯一字符: {unique_chars}")

# 获取两个列表的共同元素
list1 = [1, 2, 3, 4, 5, 5, 6]
list2 = [4, 5, 6, 7, 8, 9]

common_elements = set(list1) & set(list2)
print(f"两个列表的共同元素: {common_elements}")

四、数据类型转换

在实际数据处理中，经常需要在不同类型之间进行转换。

4.1 常见类型转换

# 数字与字符串转换
number_str = "123"
number_int = int(number_str)          # 字符串转整数
number_float = float("3.14")          # 字符串转浮点数

print(f"字符串转整数: {number_int}, 类型: {type(number_int)}")
print(f"字符串转浮点数: {number_float}, 类型: {type(number_float)}")

# 转回字符串
text1 = str(123)
text2 = str(3.14)
print(f"整数转字符串: '{text1}', 类型: {type(text1)}")
print(f"浮点数转字符串: '{text2}', 类型: {type(text2)}")

# 列表、元组、集合相互转换
my_list = [1, 2, 2, 3, 4, 4, 5]

# 列表转元组（元组不可修改）
my_tuple = tuple(my_list)
print(f"列表转元组: {my_tuple}, 类型: {type(my_tuple)}")

# 列表转集合（自动去重）
my_set = set(my_list)
print(f"列表转集合: {my_set}, 类型: {type(my_set)}")

# 集合转列表
new_list = list(my_set)
print(f"集合转列表: {new_list}, 类型: {type(new_list)}")

# 字典的特殊转换
items = [("name", "张三"), ("age", 20), ("city", "北京")]
person_dict = dict(items)
print(f"列表转字典: {person_dict}")

# 字典转列表（键值对）
dict_items = list(person_dict.items())
print(f"字典转列表（键值对）: {dict_items}")

4.2 类型转换应用

# 案例：处理用户输入的数据
def process_user_data():
    """处理用户输入的数据"""
    print("请输入您的个人信息（用逗号分隔）：")
    print("格式：姓名,年龄,城市,身高(cm)")
    
    user_input = input("请输入: ")
    
    # 分割字符串
    data_parts = user_input.split(",")
    
    # 转换为适当的类型
    if len(data_parts) >= 4:
        name = data_parts[0].strip()
        age = int(data_parts[1].strip())
        city = data_parts[2].strip()
        height = float(data_parts[3].strip())
        
        # 构建字典
        user_dict = {
            "姓名": name,
            "年龄": age,
            "城市": city,
            "身高": height
        }
        
        return user_dict
    else:
        return None

# 运行示例
user_data = process_user_data()
if user_data:
    print(f"处理后的用户数据: {user_data}")
else:
    print("输入格式不正确！")