Python threading模块使用教程
目录
1.线程基础概念
什么是线程
线程是操作系统能够进行运算调度的最小单位,它被包含在进程之中,是进程中的实际运作单位。一个进程可以包含多个线程,这些线程共享进程的内存空间。
Python中的线程
Python中的线程是通过threading
模块实现的。需要注意的是,由于GIL(全局解释器锁)的存在,Python的多线程在CPU密集型任务上并不能真正实现并行,但在I/O密集型任务中仍然非常有用。
线程与进程的区别
- 进程是资源分配的最小单位,线程是程序执行的最小单位
- 进程有独立的内存空间,线程共享进程的内存空间
- 进程间切换开销大,线程间切换开销小
- 进程间通信复杂,线程间通信简单
2.创建线程
方法1:继承Thread类
import threading
import time
class MyThread(threading.Thread):
def __init__(self, thread_id, name, counter):
threading.Thread.__init__(self)
self.thread_id = thread_id
self.name = name
self.counter = counter
def run(self):
print(f"Starting {self.name}")
print_time(self.name, self.counter, 5)
print(f"Exiting {self.name}")
def print_time(thread_name, delay, counter):
while counter:
time.sleep(delay)
print(f"{thread_name}: {time.ctime(time.time())}")
counter -= 1
# 创建新线程
thread1 = MyThread(1, "Thread-1", 1)
thread2 = MyThread(2, "Thread-2", 2)
# 启动线程
thread1.start()
thread2.start()
# 等待线程结束
thread1.join()
thread2.join()
print("Exiting Main Thread")
方法2:直接使用Thread对象
import threading
import time
def worker(num):
"""线程工作函数"""
print(f"Worker {num} started")
time.sleep(1)
print(f"Worker {num} finished")
threads = []
for i in range(5):
t = threading.Thread(target=worker, args=(i,))
threads.append(t)
t.start()
# 等待所有线程完成
for t in threads:
t.join()
print("All threads completed")
Thread类常用方法
start()
: 启动线程run()
: 线程执行的方法(可以重写)join(timeout=None)
: 等待线程终止is_alive()
: 返回线程是否存活name
: 线程名称ident
: 线程标识符daemon
: 是否为守护线程标志
3.线程同步
锁 (Lock)
import threading
shared_resource = 0
lock = threading.Lock()
def increment():
global shared_resource
for _ in range(100000):
lock.acquire()
shared_resource += 1
lock.release()
def decrement():
global shared_resource
for _ in range(100000):
lock.acquire()
shared_resource -= 1
lock.release()
t1 = threading.Thread(target=increment)
t2 = threading.Thread(target=decrement)
t1.start()
t2.start()
t1.join()
t2.join()
print(f"Final value of shared_resource is {shared_resource}")
使用with语句简化锁操作
def increment():
global shared_resource
for _ in range(100000):
with lock:
shared_resource += 1
可重入锁 (RLock)
允许同一个线程多次获取锁
rlock = threading.RLock()
def recursive_function(count):
with rlock:
if count > 0:
print(f"Acquiring lock, count = {count}")
recursive_function(count - 1)
recursive_function(5)
条件变量 (Condition)
import threading
import time
condition = threading.Condition()
items = []
def consumer():
with condition:
if not items:
print("Consumer waiting...")
condition.wait()
print(f"Consumer consumed {items.pop()}")
def producer():
with condition:
print("Producer producing...")
items.append("an item")
condition.notify()
consumer_thread = threading.Thread(target=consumer)
producer_thread = threading.Thread(target=producer)
consumer_thread.start()
time.sleep(1) # 确保消费者先等待
producer_thread.start()
consumer_thread.join()
producer_thread.join()
信号量 (Semaphore)
import threading
import time
semaphore = threading.Semaphore(3) # 最多允许3个线程同时访问
def access_resource(thread_id):
print(f"Thread {thread_id} is trying to access")
with semaphore:
print(f"Thread {thread_id} was granted access")
time.sleep(2)
print(f"Thread {thread_id} released the semaphore")
threads = []
for i in range(10):
t = threading.Thread(target=access_resource, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join()
事件 (Event)
import threading
import time
event = threading.Event()
def waiter():
print("Waiter waiting for event")
event.wait()
print("Waiter got the event")
def setter():
time.sleep(3)
print("Setter setting the event")
event.set()
t1 = threading.Thread(target=waiter)
t2 = threading.Thread(target=setter)
t1.start()
t2.start()
t1.join()
t2.join()
屏障 (Barrier)
import threading
import time
barrier = threading.Barrier(3) # 需要3个线程都到达才能继续
def worker(thread_id):
print(f"Thread {thread_id} is working")
time.sleep(thread_id)
print(f"Thread {thread_id} is waiting at barrier")
barrier.wait()
print(f"Thread {thread_id} passed the barrier")
threads = []
for i in range(3):
t = threading.Thread(target=worker, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join()
4.线程间通信
队列 (Queue)
import threading
import queue
import time
def producer(q):
for i in range(5):
print(f"Producing item {i}")
q.put(i)
time.sleep(1)
q.put(None) # 发送结束信号
def consumer(q):
while True:
item = q.get()
if item is None: # 收到结束信号
break
print(f"Consumed item {item}")
q.task_done()
q = queue.Queue()
prod_thread = threading.Thread(target=producer, args=(q,))
cons_thread = threading.Thread(target=consumer, args=(q,))
prod_thread.start()
cons_thread.start()
prod_thread.join()
cons_thread.join()
5.线程池
使用ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor
import time
def task(name):
print(f"Task {name} started")
time.sleep(2)
print(f"Task {name} finished")
return f"Result of {name}"
with ThreadPoolExecutor(max_workers=3) as executor:
# 提交任务
future1 = executor.submit(task, "A")
future2 = executor.submit(task, "B")
future3 = executor.submit(task, "C")
# 获取结果
print(future1.result())
print(future2.result())
print(future3.result())
print("All tasks completed")
使用map方法
with ThreadPoolExecutor(max_workers=2) as executor:
results = executor.map(task, ["Task1", "Task2", "Task3"])
for result in results:
print(result)
6.线程局部数据
import threading
# 创建线程局部数据
local_data = threading.local()
def show_data():
print(f"{threading.current_thread().name}: {local_data.value}")
def worker(value):
local_data.value = value
show_data()
thread1 = threading.Thread(target=worker, args=("Thread 1 data",), name="Thread-1")
thread2 = threading.Thread(target=worker, args=("Thread 2 data",), name="Thread-2")
thread1.start()
thread2.start()
thread1.join()
thread2.join()
7.守护线程
import threading
import time
def daemon_thread():
print("Daemon thread started")
time.sleep(2)
print("This will not be printed")
def non_daemon_thread():
print("Non-daemon thread started")
time.sleep(1)
print("Non-daemon thread exiting")
d = threading.Thread(name='daemon', target=daemon_thread)
d.setDaemon(True) # 设置为守护线程
t = threading.Thread(name='non-daemon', target=non_daemon_thread)
d.start()
t.start()
# 主线程等待非守护线程结束
t.join()
# 守护线程会在主线程结束时自动终止
8.高级话题
定时器 (Timer)
import threading
def hello():
print("Hello, World!")
t = threading.Timer(3.0, hello) # 3秒后执行
t.start()
自定义线程子类
import threading
import time
class CustomThread(threading.Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, *, daemon=None):
super().__init__(group=group, target=target, name=name,
daemon=daemon)
self.args = args
self.kwargs = kwargs or {}
self.result = None
def run(self):
if self._target is not None:
self.result = self._target(*self.args, **self.kwargs)
def join(self, timeout=None):
super().join(timeout)
return self.result
def calculate(a, b):
time.sleep(2)
return a + b
t = CustomThread(target=calculate, args=(3, 4))
t.start()
result = t.join()
print(f"The result is {result}")
线程安全的数据结构
import threading
import queue
class ThreadSafeCounter:
def __init__(self):
self._value = 0
self._lock = threading.Lock()
def increment(self):
with self._lock:
self._value += 1
return self._value
def decrement(self):
with self._lock:
self._value -= 1
return self._value
def value(self):
with self._lock:
return self._value
counter = ThreadSafeCounter()
def worker():
for _ in range(100000):
counter.increment()
threads = []
for _ in range(10):
t = threading.Thread(target=worker)
threads.append(t)
t.start()
for t in threads:
t.join()
print(f"Final counter value: {counter.value()}")
9.总结
常见问题
-
竞态条件:多个线程同时访问共享资源导致的不一致
- 解决方案:使用锁或其他同步机制
-
死锁:多个线程互相等待对方释放锁
- 解决方案:按固定顺序获取锁,使用超时机制
-
活锁:线程不断重试某个操作但始终无法取得进展
- 解决方案:引入随机退避时间
-
线程饥饿:某些线程一直得不到执行机会
- 解决方案:公平锁,合理设置线程优先级
注意事项
-
避免使用全局变量:尽量使用线程局部数据或传递参数
-
优先使用队列:线程间通信优先考虑使用队列而非共享变量
-
合理设置线程数量:I/O密集型可以多些,CPU密集型不宜过多
-
使用with语句管理锁:确保锁一定会被释放
-
避免在锁内执行耗时操作:减少锁的持有时间
-
考虑使用线程池:而非频繁创建销毁线程
-
适当使用守护线程:对于不需要等待的后台任务
-
注意GIL的影响:CPU密集型任务考虑多进程
GIL的影响与应对
Python的全局解释器锁(GIL)使得同一时刻只有一个线程执行Python字节码。这意味着:
- I/O密集型任务:多线程仍然有效,因为I/O操作会释放GIL
- CPU密集型任务:多线程无法利用多核优势,应考虑多进程(multiprocessing模块)
# CPU密集型任务使用多进程示例
from multiprocessing import Pool
def cpu_intensive_task(x):
return x * x
if __name__ == '__main__':
with Pool(4) as p:
print(p.map(cpu_intensive_task, range(10)))