Python 并发编程实战：优雅地使用 Concurrent.futures-51CTO.COM

在 Python 多线程编程中，concurrent.futures 模块提供了一个高层的接口来异步执行可调用对象。今天，我们将通过一个循序渐进的案例，深入了解如何使用这个强大的工具。

从一个模拟场景开始

假设我们需要处理一批网络请求。为了模拟这个场景，我们使用 sleep 来代表耗时操作：

import time
import random

def slow_operation(task_id):
    """模拟一个耗时的网络请求"""
    sleep_time = random.uniform(0.5, 2)
    time.sleep(sleep_time)
    return f"Task {task_id} completed in {sleep_time:.2f} seconds"

# 串行处理
def process_serial():
    start = time.perf_counter()
    results = []
    for i in range(10):
        result = slow_operation(i)
        results.append(result)
    end = time.perf_counter()
    print(f"串行处理总耗时：{end - start:.2f} 秒")
    return results

# 运行示例
if __name__ == '__main__':
    results = process_serial()
    for r in results:
        print(r)

串行处理总耗时：11.75 秒
Task 0 completed in 1.27 seconds
Task 1 completed in 1.10 seconds
Task 2 completed in 1.35 seconds
Task 3 completed in 1.36 seconds
Task 4 completed in 1.42 seconds
Task 5 completed in 1.55 seconds
Task 6 completed in 0.74 seconds
Task 7 completed in 0.55 seconds
Task 8 completed in 1.40 seconds
Task 9 completed in 0.97 seconds

运行这段代码，你会发现处理 10 个任务需要大约 10-15 秒。这显然不够高效。

使用传统的 threading 模块

让我们先看看使用传统的 threading 模块如何改进：

import threading
from queue import Queue

def slow_operation(task_id):
    """模拟一个耗时的网络请求"""
    sleep_time = random.uniform(0.5, 2)
    time.sleep(sleep_time)
    return f"Task {task_id} completed in {sleep_time:.2f} seconds"

def process_threading():
    start = time.perf_counter()
    results = []
    work_queue = Queue()
    lock = threading.Lock()
    
    # 填充工作队列
    for i in range(10):
        work_queue.put(i)
    
    def worker():
        while True:
            try:
                task_id = work_queue.get_nowait()
                result = slow_operation(task_id)
                with lock:
                    results.append(result)
                work_queue.task_done()
            except Queue.Empty:
                break
    
    threads = []
    for _ in range(4):  # 使用4个线程
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)
    
    for t in threads:
        t.join()
    
    end = time.perf_counter()
    print(f"多线程处理总耗时：{end - start:.2f} 秒")
    return results

多线程处理总耗时：3.24 秒

这个版本使用了多线程，性能确实提升了，但代码比较复杂，需要手动管理线程、锁和队列。

concurrent.futures 的优雅解决方案

现在，让我们看看如何使用 concurrent.futures 来简化代码：

import time
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

def slow_operation(task_id):
    """模拟一个耗时的网络请求"""
    sleep_time = random.uniform(0.5, 2)
    time.sleep(sleep_time)
    return f"Task {task_id} completed in {sleep_time:.2f} seconds"

def process_concurrent():
    start = time.perf_counter()
    results = []
    
    # 创建线程池，设置最大线程数为4
    with ThreadPoolExecutor(max_workers=4) as executor:
        # 提交任务到线程池
        future_to_id = {executor.submit(slow_operation, i): i for i in range(10)}
        
        # 获取结果
        for future in as_completed(future_to_id):
            results.append(future.result())
    
    end = time.perf_counter()
    print(f"concurrent.futures 处理总耗时：{end - start:.2f} 秒")
    return results

process_concurrent()

concurrent.futures 处理总耗时：3.54 秒

这里我们用到了几个关键概念：

ThreadPoolExecutor ：线程池执行器，用于管理一组工作线程。创建时可以指定最大线程数。
executor.submit() ：向线程池提交一个任务。返回 Future 对象，代表将来某个时刻会完成的操作。
as_completed() ：返回一个迭代器，在 Future 完成时产生对应的 Future 对象。这意味着结果是按照完成顺序而不是提交顺序返回的。

Future 对象的高级用法

Future 对象提供了多个有用的方法，让我们通过实例来了解：

import time
import random
from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED

def slow_operation(task_id):
    """模拟一个耗时的网络请求"""
    sleep_time = random.uniform(0.5, 2)
    time.sleep(sleep_time)
    return f"Task {task_id} completed in {sleep_time:.2f} seconds"

def demonstrate_future_features():
    with ThreadPoolExecutor(max_workers=4) as executor:
        # 提交任务并获取 Future 对象
        futures = [executor.submit(slow_operation, i) for i in range(10)]
        
        # 1. done() 检查任务是否完成
        print("检查第一个任务是否完成:", futures[0].done())
        
        # 2. 使用 wait() 等待部分任务完成
        done, not_done = wait(futures, return_when=FIRST_COMPLETED)
        print(f"完成的任务数: {len(done)}, 未完成的任务数: {len(not_done)}")
        
        # 3. 获取结果时设置超时
        try:
            result = futures[0].result(timeout=1.0)
            print("获取到结果:", result)
        except TimeoutError:
            print("获取结果超时")
        
        # 4. cancel() 取消未开始的任务
        for f in not_done:
            cancelled = f.cancel()
            print(f"取消任务: {'成功' if cancelled else '失败'}")

demonstrate_future_features()

检查第一个任务是否完成: False
完成的任务数: 1, 未完成的任务数: 9
获取到结果: Task 0 completed in 1.07 seconds
取消任务: 失败
取消任务: 成功
取消任务: 成功
取消任务: 失败
取消任务: 失败
取消任务: 失败
取消任务: 失败
取消任务: 成功
取消任务: 失败

线程/进程池还是异步 IO？

IO 密集型任务：优先选择 asyncio

为什么选择 asyncio ？

更低的资源开销： asyncio 使用协程，不需要创建额外的线程或进程
更高的并发量：单线程可以轻松处理数千个并发任务
没有 GIL 的限制：协程在单线程内切换，完全规避了 GIL 的影响

让我们通过一个网络请求的例子来对比：

import asyncio
import time
from concurrent.futures import ThreadPoolExecutor

# 模拟网络请求
def sync_request(url):
    time.sleep(1)  # 模拟网络延迟
    return f"Response from {url}"

async def async_request(url):
    await asyncio.sleep(1)  # 模拟网络延迟
    return f"Response from {url}"

# 使用线程池
def thread_pool_example():
    urls = [f"http://example.com/{i}" for i in range(100)]
    start = time.perf_counter()
    
    with ThreadPoolExecutor(max_workers=20) as executor:
        results = list(executor.map(sync_request, urls))
    
    end = time.perf_counter()
    print(f"ThreadPoolExecutor 耗时: {end - start:.2f} 秒")
    return results

# 使用 asyncio
async def asyncio_example():
    urls = [f"http://example.com/{i}" for i in range(100)]
    start = time.perf_counter()
    
    tasks = [async_request(url) for url in urls]
    results = await asyncio.gather(*tasks)
    
    end = time.perf_counter()
    print(f"asyncio 耗时: {end - start:.2f} 秒")
    return results

if __name__ == '__main__':
    # 运行线程池版本
    thread_results = thread_pool_example()
    
    # 运行 asyncio 版本
    asyncio_results = asyncio.run(asyncio_example())

ThreadPoolExecutor 耗时: 5.03 秒
asyncio 耗时: 1.00 秒

在这个例子中， asyncio 版本通常会表现出更好的性能，尤其是在并发量大的情况下。

CPU 密集型任务：使用 ProcessPoolExecutor

为什么选择多进程？

绕过 GIL：每个进程都有自己的 Python 解释器和 GIL
充分利用多核性能：可以真正实现并行计算
适合计算密集型任务：如数据处理、图像处理等

来看一个计算密集型任务的对比：

import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

def cpu_intensive_task(n):
    """计算密集型任务：计算大量浮点数运算"""
    result = 0
    for i in range(n):
        result += i ** 2 / 3.14
    return result

def compare_performance():
    numbers = [10**6] * 20  # 20个大规模计算任务
    
    # 使用线程池
    start = time.perf_counter()
    with ThreadPoolExecutor(max_workers=4) as executor:
        thread_results = list(executor.map(cpu_intensive_task, numbers))
    thread_time = time.perf_counter() - start
    print(f"线程池耗时: {thread_time:.2f} 秒")
    
    # 使用进程池
    start = time.perf_counter()
    with ProcessPoolExecutor(max_workers=4) as executor:
        process_results = list(executor.map(cpu_intensive_task, numbers))
    process_time = time.perf_counter() - start
    print(f"进程池耗时: {process_time:.2f} 秒")

if __name__ == '__main__':
    compare_performance()

线程池耗时: 4.61 秒
进程池耗时: 1.34 秒

在这种场景下， ProcessPoolExecutor 的性能明显优于 ThreadPoolExecutor 。

混合型任务：ThreadPoolExecutor 的优势

为什么有时候选择线程池？

更容易与现有代码集成：大多数 Python 库都是基于同步设计的
资源开销比进程池小：线程共享内存空间

适合 IO 和 CPU 混合的场景：当任务既有 IO 操作又有计算时

示例场景：

import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

def mixed_task(task_id):
    """混合型任务：既有 IO 操作又有计算"""
    # IO 操作
    time.sleep(0.5)
    
    # CPU 计算
    result = sum(i * i for i in range(10**5))
    
    # 再次 IO 操作
    time.sleep(0.5)
    
    return f"Task {task_id}: {result}"

def demonstrate_mixed_workload():
    tasks = range(10)
    
    # 使用线程池
    start = time.perf_counter()
    with ThreadPoolExecutor(max_workers=4) as executor:
        thread_results = list(executor.map(mixed_task, tasks))
    thread_time = time.perf_counter() - start
    print(f"线程池处理混合任务耗时: {thread_time:.2f} 秒")
    
    # 使用进程池
    start = time.perf_counter()
    with ProcessPoolExecutor(max_workers=4) as executor:
        process_results = list(executor.map(mixed_task, tasks))
    process_time = time.perf_counter() - start
    print(f"进程池处理混合任务耗时: {process_time:.2f} 秒")

if __name__ == '__main__':
    demonstrate_mixed_workload()

线程池处理混合任务耗时: 3.05 秒
进程池处理混合任务耗时: 3.11 秒

选择建议的决策树

在选择并发方案时，可以参考以下决策流程：

首先判断任务类型：

如果是纯 IO 密集型（网络请求、文件操作），优先选择 asyncio。
如果是纯 CPU 密集型（大量计算），优先选择 ProcessPoolExecutor。
如果是混合型任务，考虑使用 ThreadPoolExecutor。

考虑其他因素：

现有代码是否易于改造为异步？
是否需要与同步代码交互？
并发量有多大？
是否需要跨进程通信？

def choose_concurrency_model(task_type, 
                           concurrent_count,
                           legacy_code=False,
                           need_shared_memory=False):
    """帮助选择并发模型的示例函数"""
    if task_type == "IO":
        if legacy_code or need_shared_memory:
            return "ThreadPoolExecutor"
        else:
            return "asyncio"
    elif task_type == "CPU":
        if need_shared_memory:
            return "ThreadPoolExecutor"
        else:
            return "ProcessPoolExecutor"
    else:  # mixed
        if concurrent_count > 1000:
            return "asyncio"
        else:
            return "ThreadPoolExecutor"

性能对比总结

方案	IO密集型	CPU密集型	混合型	资源开销	代码复杂度
asyncio	最佳	较差	好	最低	较高
ThreadPoolExecutor	好	较差	较好	低	低
ProcessPoolExecutor	一般	最佳	一般	高	低

总的来说，选择合适的并发方案需要综合考虑任务特性、性能需求、代码复杂度等多个因素。在实际应用中，有时候甚至可以混合使用多种方案，以达到最优的性能表现。

实用技巧总结

控制线程池大小

def demonstrate_pool_sizing():
    # CPU 核心数
    cpu_count = os.cpu_count()
    # IO 密集型任务，线程数可以设置为核心数的 1-4 倍
    io_bound_workers = cpu_count * 2
    # CPU 密集型任务，线程数不应超过核心数
    cpu_bound_workers = cpu_count

    print(f"推荐的线程数：")
    print(f"IO 密集型任务：{io_bound_workers}")
    print(f"CPU 密集型任务：{cpu_bound_workers}")

批量提交任务

def demonstrate_batch_submit():
    with ThreadPoolExecutor(max_workers=4) as executor:
        results_ordered = list(executor.map(slow_operation, range(5)))

        futures = [executor.submit(slow_operation, i) for i in range(5)]
        results_completion = [f.result() for f in as_completed(futures)]

        return results_ordered, results_completion

错误处理

def demonstrate_error_handling():
    def faulty_operation(task_id):
        if task_id == 3:
            raise ValueError(f"Task {task_id} failed")
        return slow_operation(task_id)
    
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = [executor.submit(faulty_operation, i) for i in range(5)]
        
        for future in as_completed(futures):
            try:
                result = future.result()
                print(f"成功：{result}")
            except Exception as e:
                print(f"错误：{str(e)}")

总结

concurrent.futures 模块为 Python 并发编程提供了一个优雅的高级接口。相比传统的 threading / multiprocessing 模块，它具有以下优势：

使用线程池自动管理线程的生命周期
提供简洁的接口提交任务和获取结果
支持超时和错误处理
代码更加 Pythonic 和易于维护