Python threading多线程模块使用教程

原创已于 2025-04-29 10:19:43 修改 · 1.2k 阅读

30 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言

于 2025-04-29 10:06:03 首次发布

python内置模块专栏收录该内容

30 篇文章

订阅专栏

Python threading模块使用教程

1.线程基础概念

什么是线程

线程是操作系统能够进行运算调度的最小单位，它被包含在进程之中，是进程中的实际运作单位。一个进程可以包含多个线程，这些线程共享进程的内存空间。

Python中的线程

Python中的线程是通过threading模块实现的。需要注意的是，由于GIL(全局解释器锁)的存在，Python的多线程在CPU密集型任务上并不能真正实现并行，但在I/O密集型任务中仍然非常有用。

线程与进程的区别

进程是资源分配的最小单位，线程是程序执行的最小单位
进程有独立的内存空间，线程共享进程的内存空间
进程间切换开销大，线程间切换开销小
进程间通信复杂，线程间通信简单

2.创建线程

方法1：继承Thread类

import threading
import time

class MyThread(threading.Thread):
    def __init__(self, thread_id, name, counter):
        threading.Thread.__init__(self)
        self.thread_id = thread_id
        self.name = name
        self.counter = counter
    
    def run(self):
        print(f"Starting {self.name}")
        print_time(self.name, self.counter, 5)
        print(f"Exiting {self.name}")

def print_time(thread_name, delay, counter):
    while counter:
        time.sleep(delay)
        print(f"{thread_name}: {time.ctime(time.time())}")
        counter -= 1

# 创建新线程
thread1 = MyThread(1, "Thread-1", 1)
thread2 = MyThread(2, "Thread-2", 2)

# 启动线程
thread1.start()
thread2.start()

# 等待线程结束
thread1.join()
thread2.join()

print("Exiting Main Thread")

方法2：直接使用Thread对象

import threading
import time

def worker(num):
    """线程工作函数"""
    print(f"Worker {num} started")
    time.sleep(1)
    print(f"Worker {num} finished")

threads = []
for i in range(5):
    t = threading.Thread(target=worker, args=(i,))
    threads.append(t)
    t.start()

# 等待所有线程完成
for t in threads:
    t.join()

print("All threads completed")

Thread类常用方法

start(): 启动线程
run(): 线程执行的方法（可以重写）
join(timeout=None): 等待线程终止
is_alive(): 返回线程是否存活
name: 线程名称
ident: 线程标识符
daemon: 是否为守护线程标志

3.线程同步

锁 (Lock)

import threading

shared_resource = 0
lock = threading.Lock()

def increment():
    global shared_resource
    for _ in range(100000):
        lock.acquire()
        shared_resource += 1
        lock.release()

def decrement():
    global shared_resource
    for _ in range(100000):
        lock.acquire()
        shared_resource -= 1
        lock.release()

t1 = threading.Thread(target=increment)
t2 = threading.Thread(target=decrement)

t1.start()
t2.start()

t1.join()
t2.join()

print(f"Final value of shared_resource is {shared_resource}")

使用with语句简化锁操作

def increment():
    global shared_resource
    for _ in range(100000):
        with lock:
            shared_resource += 1

可重入锁 (RLock)

允许同一个线程多次获取锁

rlock = threading.RLock()

def recursive_function(count):
    with rlock:
        if count > 0:
            print(f"Acquiring lock, count = {count}")
            recursive_function(count - 1)

recursive_function(5)

条件变量 (Condition)

import threading
import time

condition = threading.Condition()
items = []

def consumer():
    with condition:
        if not items:
            print("Consumer waiting...")
            condition.wait()
        print(f"Consumer consumed {items.pop()}")

def producer():
    with condition:
        print("Producer producing...")
        items.append("an item")
        condition.notify()

consumer_thread = threading.Thread(target=consumer)
producer_thread = threading.Thread(target=producer)

consumer_thread.start()
time.sleep(1)  # 确保消费者先等待
producer_thread.start()

consumer_thread.join()
producer_thread.join()

信号量 (Semaphore)

import threading
import time

semaphore = threading.Semaphore(3)  # 最多允许3个线程同时访问

def access_resource(thread_id):
    print(f"Thread {thread_id} is trying to access")
    with semaphore:
        print(f"Thread {thread_id} was granted access")
        time.sleep(2)
    print(f"Thread {thread_id} released the semaphore")

threads = []
for i in range(10):
    t = threading.Thread(target=access_resource, args=(i,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

事件 (Event)

import threading
import time

event = threading.Event()

def waiter():
    print("Waiter waiting for event")
    event.wait()
    print("Waiter got the event")

def setter():
    time.sleep(3)
    print("Setter setting the event")
    event.set()

t1 = threading.Thread(target=waiter)
t2 = threading.Thread(target=setter)

t1.start()
t2.start()

t1.join()
t2.join()

屏障 (Barrier)

import threading
import time

barrier = threading.Barrier(3)  # 需要3个线程都到达才能继续

def worker(thread_id):
    print(f"Thread {thread_id} is working")
    time.sleep(thread_id)
    print(f"Thread {thread_id} is waiting at barrier")
    barrier.wait()
    print(f"Thread {thread_id} passed the barrier")

threads = []
for i in range(3):
    t = threading.Thread(target=worker, args=(i,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

4.线程间通信

队列 (Queue)

import threading
import queue
import time

def producer(q):
    for i in range(5):
        print(f"Producing item {i}")
        q.put(i)
        time.sleep(1)
    q.put(None)  # 发送结束信号

def consumer(q):
    while True:
        item = q.get()
        if item is None:  # 收到结束信号
            break
        print(f"Consumed item {item}")
        q.task_done()

q = queue.Queue()
prod_thread = threading.Thread(target=producer, args=(q,))
cons_thread = threading.Thread(target=consumer, args=(q,))

prod_thread.start()
cons_thread.start()

prod_thread.join()
cons_thread.join()

5.线程池

使用ThreadPoolExecutor

from concurrent.futures import ThreadPoolExecutor
import time

def task(name):
    print(f"Task {name} started")
    time.sleep(2)
    print(f"Task {name} finished")
    return f"Result of {name}"

with ThreadPoolExecutor(max_workers=3) as executor:
    # 提交任务
    future1 = executor.submit(task, "A")
    future2 = executor.submit(task, "B")
    future3 = executor.submit(task, "C")
    
    # 获取结果
    print(future1.result())
    print(future2.result())
    print(future3.result())

print("All tasks completed")

使用map方法

with ThreadPoolExecutor(max_workers=2) as executor:
    results = executor.map(task, ["Task1", "Task2", "Task3"])
    for result in results:
        print(result)

6.线程局部数据

import threading

# 创建线程局部数据
local_data = threading.local()

def show_data():
    print(f"{threading.current_thread().name}: {local_data.value}")

def worker(value):
    local_data.value = value
    show_data()

thread1 = threading.Thread(target=worker, args=("Thread 1 data",), name="Thread-1")
thread2 = threading.Thread(target=worker, args=("Thread 2 data",), name="Thread-2")

thread1.start()
thread2.start()

thread1.join()
thread2.join()

7.守护线程

import threading
import time

def daemon_thread():
    print("Daemon thread started")
    time.sleep(2)
    print("This will not be printed")

def non_daemon_thread():
    print("Non-daemon thread started")
    time.sleep(1)
    print("Non-daemon thread exiting")

d = threading.Thread(name='daemon', target=daemon_thread)
d.setDaemon(True)  # 设置为守护线程

t = threading.Thread(name='non-daemon', target=non_daemon_thread)

d.start()
t.start()

# 主线程等待非守护线程结束
t.join()
# 守护线程会在主线程结束时自动终止

8.高级话题

定时器 (Timer)

import threading

def hello():
    print("Hello, World!")

t = threading.Timer(3.0, hello)  # 3秒后执行
t.start()

自定义线程子类

import threading
import time

class CustomThread(threading.Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, *, daemon=None):
        super().__init__(group=group, target=target, name=name,
                         daemon=daemon)
        self.args = args
        self.kwargs = kwargs or {}
        self.result = None
    
    def run(self):
        if self._target is not None:
            self.result = self._target(*self.args, **self.kwargs)
    
    def join(self, timeout=None):
        super().join(timeout)
        return self.result

def calculate(a, b):
    time.sleep(2)
    return a + b

t = CustomThread(target=calculate, args=(3, 4))
t.start()
result = t.join()
print(f"The result is {result}")

线程安全的数据结构

import threading
import queue

class ThreadSafeCounter:
    def __init__(self):
        self._value = 0
        self._lock = threading.Lock()
    
    def increment(self):
        with self._lock:
            self._value += 1
            return self._value
    
    def decrement(self):
        with self._lock:
            self._value -= 1
            return self._value
    
    def value(self):
        with self._lock:
            return self._value

counter = ThreadSafeCounter()

def worker():
    for _ in range(100000):
        counter.increment()

threads = []
for _ in range(10):
    t = threading.Thread(target=worker)
    threads.append(t)
    t.start()

for t in threads:
    t.join()

print(f"Final counter value: {counter.value()}")

9.总结

常见问题

竞态条件：多个线程同时访问共享资源导致的不一致
- 解决方案：使用锁或其他同步机制
死锁：多个线程互相等待对方释放锁
- 解决方案：按固定顺序获取锁，使用超时机制
活锁：线程不断重试某个操作但始终无法取得进展
- 解决方案：引入随机退避时间
线程饥饿：某些线程一直得不到执行机会
- 解决方案：公平锁，合理设置线程优先级

注意事项

避免使用全局变量：尽量使用线程局部数据或传递参数
优先使用队列：线程间通信优先考虑使用队列而非共享变量
合理设置线程数量：I/O密集型可以多些，CPU密集型不宜过多
使用with语句管理锁：确保锁一定会被释放
避免在锁内执行耗时操作：减少锁的持有时间
考虑使用线程池：而非频繁创建销毁线程
适当使用守护线程：对于不需要等待的后台任务
注意GIL的影响：CPU密集型任务考虑多进程

GIL的影响与应对

Python的全局解释器锁(GIL)使得同一时刻只有一个线程执行Python字节码。这意味着：

I/O密集型任务：多线程仍然有效，因为I/O操作会释放GIL
CPU密集型任务：多线程无法利用多核优势，应考虑多进程(multiprocessing模块)

# CPU密集型任务使用多进程示例
from multiprocessing import Pool

def cpu_intensive_task(x):
    return x * x

if __name__ == '__main__':
    with Pool(4) as p:
        print(p.map(cpu_intensive_task, range(10)))