1. 性能分析
1-1. 性能分析工具
工具 | 用途 | 特点 |
---|---|---|
cProfile | 函数调用分析 | 标准库,详细统计 |
line_profiler | 行级分析 | 精确到代码行 |
memory_profiler | 内存分析 | 内存使用追踪 |
py-spy | 实时分析 | 低开销,实时采样 |
1-2. 性能分析示例
python
# 使用 cProfile
import cProfile
def slow_function():
result = 0
for i in range(1000000):
result += i
return result
# 运行性能分析
cProfile.run('slow_function()')
# 使用 line_profiler
@profile
def slow_function():
result = 0
for i in range(1000000):
result += i
return result
# 使用 memory_profiler
@profile
def memory_intensive():
large_list = [i for i in range(1000000)]
return sum(large_list)
2. 代码优化
2-1. 循环优化
python
# 不推荐的写法
result = []
for i in range(1000):
result.append(i * 2)
# 推荐的写法
result = [i * 2 for i in range(1000)]
# 使用生成器
def generate_numbers(n):
for i in range(n):
yield i * 2
# 使用 map
result = list(map(lambda x: x * 2, range(1000)))
2-2. 数据结构优化
python
# 列表 vs 集合
# 查找元素
items = [1, 2, 3, 4, 5]
if 3 in items: # O(n)
pass
items_set = {1, 2, 3, 4, 5}
if 3 in items_set: # O(1)
pass
# 字典优化
# 不推荐的写法
d = {}
for key, value in data:
if key not in d:
d[key] = []
d[key].append(value)
# 推荐的写法
from collections import defaultdict
d = defaultdict(list)
for key, value in data:
d[key].append(value)
2-3. 字符串处理
python
# 字符串拼接
# 不推荐的写法
result = ""
for s in strings:
result += s
# 推荐的写法
result = "".join(strings)
# 格式化字符串
# 不推荐的写法
name = "Alice"
age = 25
message = "Name: " + name + ", Age: " + str(age)
# 推荐的写法
message = f"Name: {name}, Age: {age}"
3. 内存优化
3-1. 内存管理
python
# 使用生成器减少内存使用
def read_large_file(file_path):
with open(file_path) as f:
for line in f:
yield line
# 使用上下文管理器
with open('large_file.txt') as f:
data = f.read()
# 及时释放大对象
large_data = None # 显式释放内存
3-2. 对象池
python
from functools import lru_cache
@lru_cache(maxsize=128)
def expensive_function(x):
# 计算结果会被缓存
return x * x
# 使用对象池
from multiprocessing import Pool
def process_data(data):
with Pool(4) as p:
result = p.map(process_item, data)
return result
4. 并发处理
4-1. 多线程
python
import threading
from queue import Queue
def worker(q):
while True:
item = q.get()
if item is None:
break
process_item(item)
q.task_done()
def process_with_threads(items):
q = Queue()
threads = []
for i in range(4):
t = threading.Thread(target=worker, args=(q,))
t.start()
threads.append(t)
for item in items:
q.put(item)
q.join()
for i in range(4):
q.put(None)
for t in threads:
t.join()
4-2. 多进程
python
from multiprocessing import Pool
def process_item(item):
# 处理单个项目
return result
def process_with_processes(items):
with Pool(4) as p:
results = p.map(process_item, items)
return results
4-3. 异步编程
python
import asyncio
async def fetch_data(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
return await response.text()
async def main():
urls = ['url1', 'url2', 'url3']
tasks = [fetch_data(url) for url in urls]
results = await asyncio.gather(*tasks)
return results
5. 数据库优化
5-1. 查询优化
python
# 使用批量操作
# 不推荐的写法
for item in items:
db.execute("INSERT INTO table VALUES (?)", (item,))
# 推荐的写法
db.executemany("INSERT INTO table VALUES (?)", [(item,) for item in items])
# 使用索引
cursor.execute("CREATE INDEX idx_name ON users (name)")
# 优化查询
cursor.execute("""
SELECT u.name, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id
HAVING post_count > 10
""")
5-2. 连接池
python
from dbutils.pooled_db import PooledDB
import pymysql
pool = PooledDB(
creator=pymysql,
maxconnections=6,
mincached=2,
maxcached=5,
maxshared=3,
blocking=True,
maxusage=None,
setsession=[],
ping=0,
host='localhost',
user='user',
password='password',
database='db',
charset='utf8mb4'
)
6. 缓存优化
6-1. 内存缓存
python
from functools import lru_cache
@lru_cache(maxsize=1000)
def expensive_function(x):
return x * x
# 使用缓存装饰器
def cache(func):
cache_dict = {}
def wrapper(*args):
if args in cache_dict:
return cache_dict[args]
result = func(*args)
cache_dict[args] = result
return result
return wrapper
6-2. Redis 缓存
python
import redis
from functools import wraps
r = redis.Redis(host='localhost', port=6379, db=0)
def cache_redis(ttl=300):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
key = f"{func.__name__}:{args}:{kwargs}"
result = r.get(key)
if result is not None:
return result.decode()
result = func(*args, **kwargs)
r.setex(key, ttl, result)
return result
return wrapper
return decorator
7. 性能监控
7-1. 日志监控
python
import logging
import time
def log_execution_time(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
logging.info(f"{func.__name__} 执行时间: {end_time - start_time}秒")
return result
return wrapper
7-2. 性能指标
python
from prometheus_client import Counter, Histogram
import time
REQUEST_COUNT = Counter('request_count', 'API请求次数')
REQUEST_LATENCY = Histogram('request_latency_seconds', '请求延迟')
def track_performance(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
REQUEST_COUNT.inc()
try:
result = func(*args, **kwargs)
return result
finally:
REQUEST_LATENCY.observe(time.time() - start_time)
return wrapper
TIP
性能优化最佳实践:
- 先测量,后优化
- 使用适当的数据结构
- 避免不必要的对象创建
- 合理使用缓存
- 优化数据库查询
- 使用并发处理
- 监控性能指标