Skip to content

1. 性能分析

1-1. 性能分析工具

工具用途特点
cProfile函数调用分析标准库,详细统计
line_profiler行级分析精确到代码行
memory_profiler内存分析内存使用追踪
py-spy实时分析低开销,实时采样

1-2. 性能分析示例

python
# 使用 cProfile
import cProfile

def slow_function():
    result = 0
    for i in range(1000000):
        result += i
    return result

# 运行性能分析
cProfile.run('slow_function()')

# 使用 line_profiler
@profile
def slow_function():
    result = 0
    for i in range(1000000):
        result += i
    return result

# 使用 memory_profiler
@profile
def memory_intensive():
    large_list = [i for i in range(1000000)]
    return sum(large_list)

2. 代码优化

2-1. 循环优化

python
# 不推荐的写法
result = []
for i in range(1000):
    result.append(i * 2)

# 推荐的写法
result = [i * 2 for i in range(1000)]

# 使用生成器
def generate_numbers(n):
    for i in range(n):
        yield i * 2

# 使用 map
result = list(map(lambda x: x * 2, range(1000)))

2-2. 数据结构优化

python
# 列表 vs 集合
# 查找元素
items = [1, 2, 3, 4, 5]
if 3 in items:  # O(n)
    pass

items_set = {1, 2, 3, 4, 5}
if 3 in items_set:  # O(1)
    pass

# 字典优化
# 不推荐的写法
d = {}
for key, value in data:
    if key not in d:
        d[key] = []
    d[key].append(value)

# 推荐的写法
from collections import defaultdict
d = defaultdict(list)
for key, value in data:
    d[key].append(value)

2-3. 字符串处理

python
# 字符串拼接
# 不推荐的写法
result = ""
for s in strings:
    result += s

# 推荐的写法
result = "".join(strings)

# 格式化字符串
# 不推荐的写法
name = "Alice"
age = 25
message = "Name: " + name + ", Age: " + str(age)

# 推荐的写法
message = f"Name: {name}, Age: {age}"

3. 内存优化

3-1. 内存管理

python
# 使用生成器减少内存使用
def read_large_file(file_path):
    with open(file_path) as f:
        for line in f:
            yield line

# 使用上下文管理器
with open('large_file.txt') as f:
    data = f.read()

# 及时释放大对象
large_data = None  # 显式释放内存

3-2. 对象池

python
from functools import lru_cache

@lru_cache(maxsize=128)
def expensive_function(x):
    # 计算结果会被缓存
    return x * x

# 使用对象池
from multiprocessing import Pool

def process_data(data):
    with Pool(4) as p:
        result = p.map(process_item, data)
    return result

4. 并发处理

4-1. 多线程

python
import threading
from queue import Queue

def worker(q):
    while True:
        item = q.get()
        if item is None:
            break
        process_item(item)
        q.task_done()

def process_with_threads(items):
    q = Queue()
    threads = []
    for i in range(4):
        t = threading.Thread(target=worker, args=(q,))
        t.start()
        threads.append(t)
    
    for item in items:
        q.put(item)
    
    q.join()
    for i in range(4):
        q.put(None)
    for t in threads:
        t.join()

4-2. 多进程

python
from multiprocessing import Pool

def process_item(item):
    # 处理单个项目
    return result

def process_with_processes(items):
    with Pool(4) as p:
        results = p.map(process_item, items)
    return results

4-3. 异步编程

python
import asyncio

async def fetch_data(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            return await response.text()

async def main():
    urls = ['url1', 'url2', 'url3']
    tasks = [fetch_data(url) for url in urls]
    results = await asyncio.gather(*tasks)
    return results

5. 数据库优化

5-1. 查询优化

python
# 使用批量操作
# 不推荐的写法
for item in items:
    db.execute("INSERT INTO table VALUES (?)", (item,))

# 推荐的写法
db.executemany("INSERT INTO table VALUES (?)", [(item,) for item in items])

# 使用索引
cursor.execute("CREATE INDEX idx_name ON users (name)")

# 优化查询
cursor.execute("""
    SELECT u.name, COUNT(p.id) as post_count
    FROM users u
    LEFT JOIN posts p ON u.id = p.user_id
    GROUP BY u.id
    HAVING post_count > 10
""")

5-2. 连接池

python
from dbutils.pooled_db import PooledDB
import pymysql

pool = PooledDB(
    creator=pymysql,
    maxconnections=6,
    mincached=2,
    maxcached=5,
    maxshared=3,
    blocking=True,
    maxusage=None,
    setsession=[],
    ping=0,
    host='localhost',
    user='user',
    password='password',
    database='db',
    charset='utf8mb4'
)

6. 缓存优化

6-1. 内存缓存

python
from functools import lru_cache

@lru_cache(maxsize=1000)
def expensive_function(x):
    return x * x

# 使用缓存装饰器
def cache(func):
    cache_dict = {}
    def wrapper(*args):
        if args in cache_dict:
            return cache_dict[args]
        result = func(*args)
        cache_dict[args] = result
        return result
    return wrapper

6-2. Redis 缓存

python
import redis
from functools import wraps

r = redis.Redis(host='localhost', port=6379, db=0)

def cache_redis(ttl=300):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            key = f"{func.__name__}:{args}:{kwargs}"
            result = r.get(key)
            if result is not None:
                return result.decode()
            result = func(*args, **kwargs)
            r.setex(key, ttl, result)
            return result
        return wrapper
    return decorator

7. 性能监控

7-1. 日志监控

python
import logging
import time

def log_execution_time(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        logging.info(f"{func.__name__} 执行时间: {end_time - start_time}秒")
        return result
    return wrapper

7-2. 性能指标

python
from prometheus_client import Counter, Histogram
import time

REQUEST_COUNT = Counter('request_count', 'API请求次数')
REQUEST_LATENCY = Histogram('request_latency_seconds', '请求延迟')

def track_performance(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        REQUEST_COUNT.inc()
        try:
            result = func(*args, **kwargs)
            return result
        finally:
            REQUEST_LATENCY.observe(time.time() - start_time)
    return wrapper

TIP

性能优化最佳实践:

  1. 先测量,后优化
  2. 使用适当的数据结构
  3. 避免不必要的对象创建
  4. 合理使用缓存
  5. 优化数据库查询
  6. 使用并发处理
  7. 监控性能指标