摘要
多轮对话是LLM应用的核心场景,而对话历史管理直接影响模型对上下文的理解和响应质量。本文系统讲解完整历史vs摘要历史、固定窗口摘要、重要性加权、遗忘机制,以及长期记忆与短期记忆的整合策略,帮助构建高效、连贯的多轮对话系统。
关键词速览
| 术语 | 英文 | 说明 |
|---|---|---|
| 对话历史 | Conversation History | 多轮交互的记录 |
| 上下文窗口 | Context Window | 模型处理上限 |
| 历史摘要 | History Summary | 对历史的压缩表示 |
| 遗忘机制 | Forgetting Mechanism | 选择性丢弃信息 |
| 长期记忆 | Long-term Memory | 持久化存储的信息 |
| 短期记忆 | Short-term Memory | 当前会话的信息 |
| 记忆整合 | Memory Integration | 多种记忆源合并 |
| 会话管理 | Session Management | 会话状态维护 |
| Token预算 | Token Budget | 可用的token数量 |
| 重要性打分 | Importance Scoring | 信息重要程度评估 |
一、对话历史管理基础
1.1 为什么需要对话历史管理
多轮对话中,对话历史管理面临的核心挑战:
- 上下文窗口限制:模型有最大token限制,不能无限累积历史
- 注意力稀释:历史越长,单条消息的重要性越低
- 成本累积:每次请求都传递完整历史,成本线性增长
- 相关性衰减:早期对话与当前任务可能无关
- 信息冗余:多轮对话中可能有重复内容
1.2 管理策略对比
| 策略 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|
| 完整历史 | 信息完整 | 成本高、可能超出限制 | 短对话、关键场景 |
| 固定窗口 | 简单高效 | 可能丢失重要信息 | 长对话、简单场景 |
| 摘要历史 | 节省空间 | 可能丢失细节 | 长对话、复杂场景 |
| 混合策略 | 平衡各方面 | 实现复杂 | 大多数场景 |
二、完整历史vs摘要历史
2.1 完整历史策略
class FullHistoryManager:
"""完整历史管理器"""
def __init__(self, max_context_tokens: int = 100000):
self.max_context_tokens = max_context_tokens
self.history = []
def add_message(self, role: str, content: str):
"""添加消息"""
self.history.append({
'role': role,
'content': content
})
def get_history(self) -> List[Dict]:
"""获取完整历史"""
return self.history.copy()
def get_context_for_llm(self) -> str:
"""构建LLM上下文"""
total_tokens = 0
context_messages = []
# 从最新开始添加,直到达到限制
for msg in reversed(self.history):
msg_tokens = self._estimate_tokens(msg['content'])
if total_tokens + msg_tokens > self.max_context_tokens:
break
context_messages.insert(0, msg)
total_tokens += msg_tokens
return context_messages
def _estimate_tokens(self, text: str) -> int:
"""估算token数"""
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)2.2 摘要历史策略
class SummarizedHistoryManager:
"""摘要历史管理器"""
def __init__(
self,
llm_client,
max_history_tokens: int = 8000,
summary_trigger_threshold: int = 5000
):
self.llm = llm_client
self.max_history_tokens = max_history_tokens
self.summary_trigger_threshold = summary_trigger_threshold
self.current_history = []
self.summary = None
self.summary_point = 0 # 已摘要的位置
def add_message(self, role: str, content: str):
"""添加消息"""
self.current_history.append({
'role': role,
'content': content
})
# 检查是否需要摘要
current_tokens = self._estimate_total_tokens()
if current_tokens > self.summary_trigger_threshold:
self._trigger_summarization()
def _trigger_summarization(self):
"""触发摘要"""
# 对历史前半部分进行摘要
to_summarize = self.current_history[:len(self.current_history)//2]
summary_text = self.llm.summarize(self._format_for_summary(to_summarize))
# 更新状态
self.summary = summary_text
self.current_history = self.current_history[len(to_summarize):]
self.summary_point = len(to_summarize)
def _format_for_summary(self, messages: List[Dict]) -> str:
"""格式化用于摘要"""
lines = []
for msg in messages:
lines.append(f"{msg['role']}: {msg['content']}")
return "\n".join(lines)
def get_context_for_llm(self) -> str:
"""构建LLM上下文"""
parts = []
# 添加摘要(如果有)
if self.summary:
parts.append(f"[早期对话摘要]\n{self.summary}\n")
# 添加未摘要的历史
for msg in self.current_history:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n".join(parts)
def _estimate_total_tokens(self) -> int:
"""估算当前历史总token数"""
total = 0
if self.summary:
total += self._estimate_tokens(self.summary)
for msg in self.current_history:
total += self._estimate_tokens(msg['content'])
return total
@staticmethod
def _estimate_tokens(text: str) -> int:
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)三、固定窗口摘要
3.1 固定窗口策略实现
class FixedWindowHistoryManager:
"""固定窗口历史管理器"""
def __init__(
self,
llm_client,
window_size: int = 6000, # token
overlap_tokens: int = 500
):
self.llm = llm_client
self.window_size = window_size
self.overlap_tokens = overlap_tokens
self.messages = []
self.summaries = [] # 存储历史摘要
def add_message(self, role: str, content: str):
"""添加消息"""
self.messages.append({'role': role, 'content': content})
def _should_summarize(self) -> bool:
"""检查是否需要摘要"""
return self._total_tokens() > self.window_size
def _total_tokens(self) -> int:
"""计算总token数"""
return sum(self._estimate_tokens(m['content']) for m in self.messages)
def get_context(self) -> str:
"""获取当前上下文"""
if self._should_summarize():
self._summarize_old_history()
return self._build_context()
def _summarize_old_history(self):
"""摘要旧历史"""
# 找到需要摘要的起始位置
total = 0
split_index = 0
for i, msg in enumerate(self.messages):
total += self._estimate_tokens(msg['content'])
if total > self.window_size - self._estimate_tokens(self.messages[i]['content']):
split_index = i
break
if split_index > 0:
# 摘要前半部分
old_messages = self.messages[:split_index]
old_summary = self.llm.summarize(
self._format_messages(old_messages)
)
self.summaries.append({
'summary': old_summary,
'message_count': split_index
})
# 保留重叠部分
overlap = self._get_overlap_messages(old_messages)
self.messages = overlap + self.messages[split_index:]
def _get_overlap_messages(self, old_messages: List[Dict]) -> List[Dict]:
"""获取重叠消息"""
overlap_size = 0
overlap_messages = []
for msg in reversed(old_messages):
msg_tokens = self._estimate_tokens(msg['content'])
if overlap_size + msg_tokens > self.overlap_tokens:
break
overlap_messages.insert(0, msg)
overlap_size += msg_tokens
return overlap_messages
def _format_messages(self, messages: List[Dict]) -> str:
"""格式化消息"""
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
def _build_context(self) -> str:
"""构建上下文"""
parts = []
# 添加历史摘要
if self.summaries:
parts.append("[历史对话摘要]")
for i, s in enumerate(self.summaries):
parts.append(f"阶段{i+1}: {s['summary']}")
parts.append("")
# 添加当前消息
for msg in self.messages:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n".join(parts)
@staticmethod
def _estimate_tokens(text: str) -> int:
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)3.2 渐进式摘要
class ProgressiveSummarizer:
"""渐进式摘要器"""
def __init__(self, llm_client):
self.llm = llm_client
self.summaries = []
self.raw_messages = []
def add_message(self, role: str, content: str):
"""添加消息"""
self.raw_messages.append({'role': role, 'content': content})
def summarize_if_needed(
self,
threshold_tokens: int = 4000,
min_messages: int = 5
) -> str:
"""必要时进行摘要"""
if len(self.raw_messages) < min_messages:
return ""
total_tokens = self._total_tokens()
if total_tokens < threshold_tokens:
return ""
# 执行摘要
return self._create_summary()
def _create_summary(self) -> str:
"""创建摘要"""
# 选择要摘要的消息
to_summarize = self.raw_messages[:len(self.raw_messages)//2]
prompt = f"""请总结以下对话的核心内容,保留关键信息和结论。
对话内容:
{self._format_dialogue(to_summarize)}
摘要要求:
1. 保留关键话题和决定
2. 保留重要的用户需求
3. 保留模型提供的解决方案
4. 删除重复和细节
5. 控制在200字以内
"""
summary = self.llm.generate(prompt)
# 保存摘要,清除已摘要的消息
self.summaries.append(summary)
self.raw_messages = self.raw_messages[len(to_summarize):]
return summary
def get_full_context(self) -> str:
"""获取完整上下文"""
parts = []
# 添加历史摘要
if self.summaries:
parts.append("【早期对话摘要】")
for i, summary in enumerate(self.summaries):
parts.append(f"阶段{i+1}: {summary}")
parts.append("")
# 添加当前消息
for msg in self.raw_messages:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n".join(parts)
def _format_dialogue(self, messages: List[Dict]) -> str:
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
def _total_tokens(self) -> int:
return sum(self._estimate_tokens(m['content']) for m in self.raw_messages)
@staticmethod
def _estimate_tokens(text: str) -> int:
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)四、重要性加权
4.1 重要性评估
class ImportanceWeightedHistoryManager:
"""重要性加权历史管理器"""
def __init__(self, llm_client, max_tokens: int = 8000):
self.llm = llm_client
self.max_tokens = max_tokens
self.messages = []
def add_message(self, role: str, content: str, is_important: bool = None):
"""添加消息,可指定重要性"""
importance = self._assess_importance(role, content, is_important)
self.messages.append({
'role': role,
'content': content,
'importance': importance,
'timestamp': len(self.messages)
})
def _assess_importance(
self,
role: str,
content: str,
explicit_important: bool = None
) -> float:
"""评估消息重要性"""
score = 0.5 # 基础分数
# 显式标记
if explicit_important:
return 1.0
# 角色权重
role_weights = {
'user': 0.8,
'assistant': 0.7,
'system': 0.5
}
score *= role_weights.get(role, 0.5)
# 内容特征调整
important_keywords = ['重要', '必须', '关键', '不要', '记住', '忘记', '确认']
for kw in important_keywords:
if kw in content:
score += 0.1
# 长度调整(适中长度更重要)
length = len(content)
if 50 < length < 500:
score += 0.1
elif length > 1000:
score -= 0.1
# 代码或数据
if '```' in content or '```' in content:
score += 0.1
return min(max(score, 0.1), 1.0)
def get_weighted_context(self, current_query: str = "") -> str:
"""获取加权后的上下文"""
# 1. 计算每条消息与当前查询的相关性
scored_messages = self._score_relevance(current_query)
# 2. 计算综合分数
for msg in scored_messages:
msg['combined_score'] = (
0.6 * msg['importance'] +
0.4 * msg.get('query_relevance', 0.5)
)
# 3. 按综合分数排序并选择
scored_messages.sort(key=lambda x: x['combined_score'], reverse=True)
# 4. 贪婪选择,优先保留高分数
selected = []
total_tokens = 0
for msg in scored_messages:
msg_tokens = self._estimate_tokens(msg['content'])
if total_tokens + msg_tokens <= self.max_tokens:
selected.append(msg)
total_tokens += msg_tokens
# 5. 按时间顺序重排
selected.sort(key=lambda x: x['timestamp'])
return self._format_messages(selected)
def _score_relevance(self, query: str) -> List[Dict]:
"""计算与查询的相关性"""
if not query:
return self.messages
# 简单的关键词匹配
query_terms = set(query.lower().split())
for msg in self.messages:
content_terms = set(msg['content'].lower().split())
overlap = len(query_terms & content_terms)
msg['query_relevance'] = overlap / max(len(query_terms), 1)
return self.messages
def _format_messages(self, messages: List[Dict]) -> str:
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
@staticmethod
def _estimate_tokens(text: str) -> int:
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)4.2 关键词触发的重要性
class KeywordTriggeredImportanceManager:
"""关键词触发的历史管理器"""
IMPORTANCE_KEYWORDS = {
'high': ['必须', '重要', '关键', '紧急', '不要忘记', '一定', '切记'],
'medium': ['建议', '可以', '尝试', '最好', '推荐'],
'low': ['顺便', '另外', '对了', '哦']
}
USER_INTENT_KEYWORDS = {
'preference': ['喜欢', '不喜欢', '偏好', '倾向'],
'constraint': ['不能', '不要', '必须', '只能'],
'goal': ['想要', '目标', '希望', '达到'],
'context': ['之前', '上次', '之前提到']
}
def __init__(self):
self.messages = []
def add_message(self, role: str, content: str):
"""添加消息并自动评估重要性"""
importance = self._auto_assess(content)
intent_type = self._detect_intent(content)
self.messages.append({
'role': role,
'content': content,
'importance': importance,
'intent_type': intent_type,
'captured_entities': self._extract_entities(content)
})
def _auto_assess(self, content: str) -> float:
"""自动评估重要性"""
score = 0.5
# 检查高重要性关键词
for kw in self.IMPORTANCE_KEYWORDS['high']:
if kw in content:
score = max(score, 0.9)
# 检查中重要性关键词
for kw in self.IMPORTANCE_KEYWORDS['medium']:
if kw in content:
score = max(score, 0.7)
# 检查低重要性关键词
for kw in self.IMPORTANCE_KEYWORDS['low']:
if kw in content:
score = min(score, 0.3)
return score
def _detect_intent(self, content: str) -> str:
"""检测用户意图类型"""
for intent, keywords in self.USER_INTENT_KEYWORDS.items():
for kw in keywords:
if kw in content:
return intent
return 'general'
def _extract_entities(self, content: str) -> Dict:
"""提取实体信息"""
# 简单实现,实际可用NER模型
entities = {
'names': [],
'dates': [],
'numbers': [],
'topics': []
}
import re
# 日期
dates = re.findall(r'\d{4}[-/年]\d{1,2}[-/月]\d{1,2}日?', content)
entities['dates'].extend(dates)
# 数字
numbers = re.findall(r'\d+', content)
entities['numbers'].extend(numbers[:5]) # 限制数量
return entities五、遗忘机制
5.1 基于时间的遗忘
import time
from datetime import datetime, timedelta
class TimeBasedForgettingManager:
"""基于时间的遗忘机制"""
def __init__(
self,
max_age_hours: int = 24,
decay_rate: float = 0.1
):
self.max_age_hours = max_age_hours
self.decay_rate = decay_rate
self.messages = []
self.message_id_counter = 0
def add_message(self, role: str, content: str):
"""添加消息"""
self.messages.append({
'id': self.message_id_counter,
'role': role,
'content': content,
'created_at': datetime.now(),
'last_accessed': datetime.now(),
'access_count': 0
})
self.message_id_counter += 1
def access_message(self, message_id: int):
"""访问消息(更新访问时间)"""
for msg in self.messages:
if msg['id'] == message_id:
msg['last_accessed'] = datetime.now()
msg['access_count'] += 1
break
def get_active_context(self) -> List[Dict]:
"""获取活跃上下文"""
now = datetime.now()
active_messages = []
for msg in self.messages:
age = now - msg['created_at']
# 完全过期,删除
if age > timedelta(hours=self.max_age_hours * 2):
self.messages.remove(msg)
continue
# 计算衰减后的重要性
decay_factor = self._calculate_decay(msg, now)
if decay_factor > 0.3: # 保留阈值
msg['decay_factor'] = decay_factor
active_messages.append(msg)
return active_messages
def _calculate_decay(self, msg: Dict, now: datetime) -> float:
"""计算衰减因子"""
# 基于时间的衰减
age = now - msg['created_at']
hours_elapsed = age.total_seconds() / 3600
time_decay = max(0, 1 - hours_elapsed * self.decay_rate / self.max_age_hours)
# 基于访问的增强
access_boost = min(msg['access_count'] * 0.1, 0.3)
return min(time_decay + access_boost, 1.0)5.2 基于重要性的遗忘
class ImportanceBasedForgettingManager:
"""基于重要性的遗忘机制"""
def __init__(
self,
max_messages: int = 50,
min_importance_threshold: float = 0.2
):
self.max_messages = max_messages
self.min_importance_threshold = min_importance_threshold
self.messages = []
def add_message(self, role: str, content: str, importance: float = 0.5):
"""添加消息"""
self.messages.append({
'role': role,
'content': content,
'importance': importance,
'key_points': self._extract_key_points(content)
})
# 遗忘低重要性消息
self._forget_if_needed()
def _extract_key_points(self, content: str) -> List[str]:
"""提取关键点"""
# 简单实现:提取包含关键词的句子
key_phrases = ['但是', '所以', '因为', '因此', '关键', '重要']
points = []
for phrase in key_phrases:
if phrase in content:
idx = content.index(phrase)
start = max(0, idx - 20)
end = min(len(content), idx + 30)
points.append(content[start:end])
return points[:3] # 最多保留3个
def _forget_if_needed(self):
"""遗忘不必要的消息"""
if len(self.messages) <= self.max_messages:
return
# 按重要性排序
sorted_messages = sorted(
self.messages,
key=lambda x: x['importance'],
reverse=True
)
# 保留高重要性的
self.messages = sorted_messages[:self.max_messages]
# 如果有低重要性的被删除,保留其关键点
forgotten = sorted_messages[self.max_messages:]
if forgotten:
preserved_key_points = []
for msg in forgotten:
if msg['key_points']:
preserved_key_points.extend(msg['key_points'])
if preserved_key_points:
self.preserved_context = {
'key_points': preserved_key_points,
'count': len(forgotten)
}
def get_context_with_forgetting(self) -> str:
"""获取考虑遗忘的上下文"""
parts = []
# 添加保留的关键点摘要
if hasattr(self, 'preserved_context') and self.preserved_context:
points = self.preserved_context['key_points'][:5]
if points:
parts.append("【之前提到的关键信息】")
for point in points:
parts.append(f"- {point}")
parts.append("")
# 添加当前消息
for msg in self.messages:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n".join(parts)六、长期记忆与短期记忆整合
6.1 记忆层次架构
┌─────────────────────────────────────────┐
│ 长期记忆 (Long-term Memory) │
│ - 用户偏好 - 重要事实 - 历史总结 │
├─────────────────────────────────────────┤
│ 短期记忆 (Short-term Memory) │
│ - 当前对话 - 最近上下文 - 临时信息 │
├─────────────────────────────────────────┤
│ 工作记忆 (Working Memory) │
│ - 当前任务 - 活跃焦点 - 即时需求 │
└─────────────────────────────────────────┘
6.2 整合实现
class IntegratedMemoryManager:
"""整合的记忆管理器"""
def __init__(self, llm_client, storage_backend=None):
self.llm = llm_client
self.storage = storage_backend # 可以是数据库、文件等
# 短期记忆
self.short_term = []
# 工作记忆
self.working_memory = {
'current_task': None,
'active_focus': None,
'pending_items': []
}
def add_to_short_term(self, role: str, content: str):
"""添加到短期记忆"""
self.short_term.append({
'role': role,
'content': content,
'timestamp': datetime.now()
})
# 检查是否需要转移到长期记忆
self._check_long_term_extraction()
def _check_long_term_extraction(self):
"""检查是否需要提取到长期记忆"""
if len(self.short_term) < 10:
return
# 检查是否有值得保留的信息
recent = self.short_term[-5:]
# 使用LLM判断是否有重要信息
prompt = f"""分析以下对话,判断是否有需要长期记住的信息。
对话:
{self._format_dialogue(recent)}
输出格式:
- 如果有重要信息,列出需要记住的要点
- 如果没有,输出"无需保留"
"""
result = self.llm.generate(prompt)
if "无需保留" not in result:
# 保存到长期记忆
self._save_to_long_term(result)
# 清理短期记忆
self.short_term = self.short_term[-3:] # 保留最近3条
def _save_to_long_term(self, summary: str):
"""保存到长期记忆"""
if self.storage:
self.storage.save('long_term_memory', {
'summary': summary,
'timestamp': datetime.now().isoformat()
})
def get_long_term_memory(self, query: str = "") -> str:
"""获取长期记忆"""
if not self.storage:
return ""
memory = self.storage.load('long_term_memory')
if not memory:
return ""
# 如果有查询,过滤相关内容
if query:
relevant = self._filter_relevant(memory, query)
return relevant
return memory.get('summary', '')
def _filter_relevant(self, memory: Dict, query: str) -> str:
"""过滤相关内容"""
summary = memory.get('summary', '')
# 简单关键词匹配
query_terms = set(query.lower().split())
summary_terms = set(summary.lower().split())
if query_terms & summary_terms:
return summary
return ""
def update_working_memory(self, task: str = None, focus: str = None):
"""更新工作记忆"""
if task:
self.working_memory['current_task'] = task
if focus:
self.working_memory['active_focus'] = focus
def get_full_context(self, current_query: str = "") -> str:
"""获取完整上下文"""
parts = []
# 1. 工作记忆
if self.working_memory['current_task']:
parts.append(f"【当前任务】{self.working_memory['current_task']}")
if self.working_memory['active_focus']:
parts.append(f"【当前焦点】{self.working_memory['active_focus']}")
# 2. 长期记忆
long_term = self.get_long_term_memory(current_query)
if long_term:
parts.append(f"【长期记忆】\n{long_term}")
# 3. 短期记忆
if self.short_term:
parts.append("【近期对话】")
for msg in self.short_term:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n\n".join(parts)
def _format_dialogue(self, messages: List[Dict]) -> str:
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])七、完整对话管理实现
class ProductionConversationManager:
"""生产级对话管理器"""
def __init__(
self,
llm_client,
max_tokens: int = 80000,
summarize_threshold: int = 30000
):
self.llm = llm_client
self.max_tokens = max_tokens
self.summarize_threshold = summarize_threshold
# 核心组件
self.full_history = [] # 完整历史
self.summary_history = [] # 摘要历史
self.summarizer = ProgressiveSummarizer(llm_client)
self.importance_manager = ImportanceWeightedHistoryManager(llm_client)
# 配置
self.summary_trigger_ratio = 0.5 # 50%时触发摘要
def add_message(self, role: str, content: str, metadata: Dict = None):
"""添加消息"""
message = {
'role': role,
'content': content,
'metadata': metadata or {},
'timestamp': datetime.now().isoformat()
}
self.full_history.append(message)
# 重要性评估
self.importance_manager.add_message(role, content)
# 摘要检查
self._check_summarization()
def _check_summarization(self):
"""检查是否需要摘要"""
total = self._calculate_total_tokens()
if total > self.max_tokens:
self._perform_summarization()
elif total > self.summarize_threshold:
# 触发渐进式摘要
self.summarizer.add_message(
self.full_history[-1]['role'],
self.full_history[-1]['content']
)
summary = self.summarizer.summarize_if_needed(
threshold_tokens=self.summarize_threshold // 2
)
if summary:
self.summary_history.append(summary)
def _perform_summarization(self):
"""执行摘要"""
# 保留最近的1/3,清摘要前2/3
keep_count = len(self.full_history) // 3
to_summarize = self.full_history[:-keep_count]
prompt = f"""总结以下对话的关键内容:
{self._format_dialogue(to_summarize)}
要求:
1. 保留所有重要决定和结论
2. 保留关键的用户需求
3. 删除重复和次要细节
"""
summary = self.llm.generate(prompt)
self.summary_history.append(summary)
self.full_history = self.full_history[-keep_count:]
def get_context_for_llm(self, current_query: str = "") -> str:
"""获取LLM上下文"""
parts = []
# 1. 历史摘要
if self.summary_history:
parts.append("【早期对话摘要】")
for i, summary in enumerate(self.summary_history):
parts.append(f"阶段{i+1}: {summary}")
parts.append("")
# 2. 完整近期历史
for msg in self.full_history:
parts.append(f"{msg['role']}: {msg['content']}")
return "\n\n".join(parts)
def _calculate_total_tokens(self) -> int:
total = 0
for summary in self.summary_history:
total += self._estimate_tokens(summary)
for msg in self.full_history:
total += self._estimate_tokens(msg['content'])
return total
@staticmethod
def _estimate_tokens(text: str) -> int:
chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english = len(text.split()) - chinese
return int(chinese * 0.5 + english * 0.25)
@staticmethod
def _format_dialogue(messages: List[Dict]) -> str:
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])八、相关主题
九、参考文献
- Miller, A. (2023). Retrieval-Augmented Generation for Conversational AI.
- Xu, S., et al. (2023). MemoRAG: Moving towards Next-Gen RAG via Memory-Augmented Generation.
- Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks.