摘要

多轮对话是LLM应用的核心场景,而对话历史管理直接影响模型对上下文的理解和响应质量。本文系统讲解完整历史vs摘要历史、固定窗口摘要、重要性加权、遗忘机制,以及长期记忆与短期记忆的整合策略,帮助构建高效、连贯的多轮对话系统。

关键词速览

术语英文说明
对话历史Conversation History多轮交互的记录
上下文窗口Context Window模型处理上限
历史摘要History Summary对历史的压缩表示
遗忘机制Forgetting Mechanism选择性丢弃信息
长期记忆Long-term Memory持久化存储的信息
短期记忆Short-term Memory当前会话的信息
记忆整合Memory Integration多种记忆源合并
会话管理Session Management会话状态维护
Token预算Token Budget可用的token数量
重要性打分Importance Scoring信息重要程度评估

一、对话历史管理基础

1.1 为什么需要对话历史管理

多轮对话中,对话历史管理面临的核心挑战:

  1. 上下文窗口限制:模型有最大token限制,不能无限累积历史
  2. 注意力稀释:历史越长,单条消息的重要性越低
  3. 成本累积:每次请求都传递完整历史,成本线性增长
  4. 相关性衰减:早期对话与当前任务可能无关
  5. 信息冗余:多轮对话中可能有重复内容

1.2 管理策略对比

策略优点缺点适用场景
完整历史信息完整成本高、可能超出限制短对话、关键场景
固定窗口简单高效可能丢失重要信息长对话、简单场景
摘要历史节省空间可能丢失细节长对话、复杂场景
混合策略平衡各方面实现复杂大多数场景

二、完整历史vs摘要历史

2.1 完整历史策略

class FullHistoryManager:
    """完整历史管理器"""
    
    def __init__(self, max_context_tokens: int = 100000):
        self.max_context_tokens = max_context_tokens
        self.history = []
    
    def add_message(self, role: str, content: str):
        """添加消息"""
        self.history.append({
            'role': role,
            'content': content
        })
    
    def get_history(self) -> List[Dict]:
        """获取完整历史"""
        return self.history.copy()
    
    def get_context_for_llm(self) -> str:
        """构建LLM上下文"""
        total_tokens = 0
        context_messages = []
        
        # 从最新开始添加,直到达到限制
        for msg in reversed(self.history):
            msg_tokens = self._estimate_tokens(msg['content'])
            if total_tokens + msg_tokens > self.max_context_tokens:
                break
            context_messages.insert(0, msg)
            total_tokens += msg_tokens
        
        return context_messages
    
    def _estimate_tokens(self, text: str) -> int:
        """估算token数"""
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)

2.2 摘要历史策略

class SummarizedHistoryManager:
    """摘要历史管理器"""
    
    def __init__(
        self,
        llm_client,
        max_history_tokens: int = 8000,
        summary_trigger_threshold: int = 5000
    ):
        self.llm = llm_client
        self.max_history_tokens = max_history_tokens
        self.summary_trigger_threshold = summary_trigger_threshold
        self.current_history = []
        self.summary = None
        self.summary_point = 0  # 已摘要的位置
    
    def add_message(self, role: str, content: str):
        """添加消息"""
        self.current_history.append({
            'role': role,
            'content': content
        })
        
        # 检查是否需要摘要
        current_tokens = self._estimate_total_tokens()
        if current_tokens > self.summary_trigger_threshold:
            self._trigger_summarization()
    
    def _trigger_summarization(self):
        """触发摘要"""
        # 对历史前半部分进行摘要
        to_summarize = self.current_history[:len(self.current_history)//2]
        
        summary_text = self.llm.summarize(self._format_for_summary(to_summarize))
        
        # 更新状态
        self.summary = summary_text
        self.current_history = self.current_history[len(to_summarize):]
        self.summary_point = len(to_summarize)
    
    def _format_for_summary(self, messages: List[Dict]) -> str:
        """格式化用于摘要"""
        lines = []
        for msg in messages:
            lines.append(f"{msg['role']}: {msg['content']}")
        return "\n".join(lines)
    
    def get_context_for_llm(self) -> str:
        """构建LLM上下文"""
        parts = []
        
        # 添加摘要(如果有)
        if self.summary:
            parts.append(f"[早期对话摘要]\n{self.summary}\n")
        
        # 添加未摘要的历史
        for msg in self.current_history:
            parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n".join(parts)
    
    def _estimate_total_tokens(self) -> int:
        """估算当前历史总token数"""
        total = 0
        if self.summary:
            total += self._estimate_tokens(self.summary)
        for msg in self.current_history:
            total += self._estimate_tokens(msg['content'])
        return total
    
    @staticmethod
    def _estimate_tokens(text: str) -> int:
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)

三、固定窗口摘要

3.1 固定窗口策略实现

class FixedWindowHistoryManager:
    """固定窗口历史管理器"""
    
    def __init__(
        self,
        llm_client,
        window_size: int = 6000,  # token
        overlap_tokens: int = 500
    ):
        self.llm = llm_client
        self.window_size = window_size
        self.overlap_tokens = overlap_tokens
        self.messages = []
        self.summaries = []  # 存储历史摘要
    
    def add_message(self, role: str, content: str):
        """添加消息"""
        self.messages.append({'role': role, 'content': content})
    
    def _should_summarize(self) -> bool:
        """检查是否需要摘要"""
        return self._total_tokens() > self.window_size
    
    def _total_tokens(self) -> int:
        """计算总token数"""
        return sum(self._estimate_tokens(m['content']) for m in self.messages)
    
    def get_context(self) -> str:
        """获取当前上下文"""
        if self._should_summarize():
            self._summarize_old_history()
        
        return self._build_context()
    
    def _summarize_old_history(self):
        """摘要旧历史"""
        # 找到需要摘要的起始位置
        total = 0
        split_index = 0
        for i, msg in enumerate(self.messages):
            total += self._estimate_tokens(msg['content'])
            if total > self.window_size - self._estimate_tokens(self.messages[i]['content']):
                split_index = i
                break
        
        if split_index > 0:
            # 摘要前半部分
            old_messages = self.messages[:split_index]
            old_summary = self.llm.summarize(
                self._format_messages(old_messages)
            )
            
            self.summaries.append({
                'summary': old_summary,
                'message_count': split_index
            })
            
            # 保留重叠部分
            overlap = self._get_overlap_messages(old_messages)
            self.messages = overlap + self.messages[split_index:]
    
    def _get_overlap_messages(self, old_messages: List[Dict]) -> List[Dict]:
        """获取重叠消息"""
        overlap_size = 0
        overlap_messages = []
        
        for msg in reversed(old_messages):
            msg_tokens = self._estimate_tokens(msg['content'])
            if overlap_size + msg_tokens > self.overlap_tokens:
                break
            overlap_messages.insert(0, msg)
            overlap_size += msg_tokens
        
        return overlap_messages
    
    def _format_messages(self, messages: List[Dict]) -> str:
        """格式化消息"""
        return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
    
    def _build_context(self) -> str:
        """构建上下文"""
        parts = []
        
        # 添加历史摘要
        if self.summaries:
            parts.append("[历史对话摘要]")
            for i, s in enumerate(self.summaries):
                parts.append(f"阶段{i+1}: {s['summary']}")
            parts.append("")
        
        # 添加当前消息
        for msg in self.messages:
            parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n".join(parts)
    
    @staticmethod
    def _estimate_tokens(text: str) -> int:
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)

3.2 渐进式摘要

class ProgressiveSummarizer:
    """渐进式摘要器"""
    
    def __init__(self, llm_client):
        self.llm = llm_client
        self.summaries = []
        self.raw_messages = []
    
    def add_message(self, role: str, content: str):
        """添加消息"""
        self.raw_messages.append({'role': role, 'content': content})
    
    def summarize_if_needed(
        self,
        threshold_tokens: int = 4000,
        min_messages: int = 5
    ) -> str:
        """必要时进行摘要"""
        if len(self.raw_messages) < min_messages:
            return ""
        
        total_tokens = self._total_tokens()
        
        if total_tokens < threshold_tokens:
            return ""
        
        # 执行摘要
        return self._create_summary()
    
    def _create_summary(self) -> str:
        """创建摘要"""
        # 选择要摘要的消息
        to_summarize = self.raw_messages[:len(self.raw_messages)//2]
        
        prompt = f"""请总结以下对话的核心内容,保留关键信息和结论。
 
对话内容:
{self._format_dialogue(to_summarize)}
 
摘要要求:
1. 保留关键话题和决定
2. 保留重要的用户需求
3. 保留模型提供的解决方案
4. 删除重复和细节
5. 控制在200字以内
"""
        
        summary = self.llm.generate(prompt)
        
        # 保存摘要,清除已摘要的消息
        self.summaries.append(summary)
        self.raw_messages = self.raw_messages[len(to_summarize):]
        
        return summary
    
    def get_full_context(self) -> str:
        """获取完整上下文"""
        parts = []
        
        # 添加历史摘要
        if self.summaries:
            parts.append("【早期对话摘要】")
            for i, summary in enumerate(self.summaries):
                parts.append(f"阶段{i+1}: {summary}")
            parts.append("")
        
        # 添加当前消息
        for msg in self.raw_messages:
            parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n".join(parts)
    
    def _format_dialogue(self, messages: List[Dict]) -> str:
        return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
    
    def _total_tokens(self) -> int:
        return sum(self._estimate_tokens(m['content']) for m in self.raw_messages)
    
    @staticmethod
    def _estimate_tokens(text: str) -> int:
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)

四、重要性加权

4.1 重要性评估

class ImportanceWeightedHistoryManager:
    """重要性加权历史管理器"""
    
    def __init__(self, llm_client, max_tokens: int = 8000):
        self.llm = llm_client
        self.max_tokens = max_tokens
        self.messages = []
    
    def add_message(self, role: str, content: str, is_important: bool = None):
        """添加消息,可指定重要性"""
        importance = self._assess_importance(role, content, is_important)
        
        self.messages.append({
            'role': role,
            'content': content,
            'importance': importance,
            'timestamp': len(self.messages)
        })
    
    def _assess_importance(
        self,
        role: str,
        content: str,
        explicit_important: bool = None
    ) -> float:
        """评估消息重要性"""
        score = 0.5  # 基础分数
        
        # 显式标记
        if explicit_important:
            return 1.0
        
        # 角色权重
        role_weights = {
            'user': 0.8,
            'assistant': 0.7,
            'system': 0.5
        }
        score *= role_weights.get(role, 0.5)
        
        # 内容特征调整
        important_keywords = ['重要', '必须', '关键', '不要', '记住', '忘记', '确认']
        for kw in important_keywords:
            if kw in content:
                score += 0.1
        
        # 长度调整(适中长度更重要)
        length = len(content)
        if 50 < length < 500:
            score += 0.1
        elif length > 1000:
            score -= 0.1
        
        # 代码或数据
        if '```' in content or '```' in content:
            score += 0.1
        
        return min(max(score, 0.1), 1.0)
    
    def get_weighted_context(self, current_query: str = "") -> str:
        """获取加权后的上下文"""
        # 1. 计算每条消息与当前查询的相关性
        scored_messages = self._score_relevance(current_query)
        
        # 2. 计算综合分数
        for msg in scored_messages:
            msg['combined_score'] = (
                0.6 * msg['importance'] +
                0.4 * msg.get('query_relevance', 0.5)
            )
        
        # 3. 按综合分数排序并选择
        scored_messages.sort(key=lambda x: x['combined_score'], reverse=True)
        
        # 4. 贪婪选择,优先保留高分数
        selected = []
        total_tokens = 0
        
        for msg in scored_messages:
            msg_tokens = self._estimate_tokens(msg['content'])
            if total_tokens + msg_tokens <= self.max_tokens:
                selected.append(msg)
                total_tokens += msg_tokens
        
        # 5. 按时间顺序重排
        selected.sort(key=lambda x: x['timestamp'])
        
        return self._format_messages(selected)
    
    def _score_relevance(self, query: str) -> List[Dict]:
        """计算与查询的相关性"""
        if not query:
            return self.messages
        
        # 简单的关键词匹配
        query_terms = set(query.lower().split())
        
        for msg in self.messages:
            content_terms = set(msg['content'].lower().split())
            overlap = len(query_terms & content_terms)
            msg['query_relevance'] = overlap / max(len(query_terms), 1)
        
        return self.messages
    
    def _format_messages(self, messages: List[Dict]) -> str:
        return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
    
    @staticmethod
    def _estimate_tokens(text: str) -> int:
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)

4.2 关键词触发的重要性

class KeywordTriggeredImportanceManager:
    """关键词触发的历史管理器"""
    
    IMPORTANCE_KEYWORDS = {
        'high': ['必须', '重要', '关键', '紧急', '不要忘记', '一定', '切记'],
        'medium': ['建议', '可以', '尝试', '最好', '推荐'],
        'low': ['顺便', '另外', '对了', '哦']
    }
    
    USER_INTENT_KEYWORDS = {
        'preference': ['喜欢', '不喜欢', '偏好', '倾向'],
        'constraint': ['不能', '不要', '必须', '只能'],
        'goal': ['想要', '目标', '希望', '达到'],
        'context': ['之前', '上次', '之前提到']
    }
    
    def __init__(self):
        self.messages = []
    
    def add_message(self, role: str, content: str):
        """添加消息并自动评估重要性"""
        importance = self._auto_assess(content)
        intent_type = self._detect_intent(content)
        
        self.messages.append({
            'role': role,
            'content': content,
            'importance': importance,
            'intent_type': intent_type,
            'captured_entities': self._extract_entities(content)
        })
    
    def _auto_assess(self, content: str) -> float:
        """自动评估重要性"""
        score = 0.5
        
        # 检查高重要性关键词
        for kw in self.IMPORTANCE_KEYWORDS['high']:
            if kw in content:
                score = max(score, 0.9)
        
        # 检查中重要性关键词
        for kw in self.IMPORTANCE_KEYWORDS['medium']:
            if kw in content:
                score = max(score, 0.7)
        
        # 检查低重要性关键词
        for kw in self.IMPORTANCE_KEYWORDS['low']:
            if kw in content:
                score = min(score, 0.3)
        
        return score
    
    def _detect_intent(self, content: str) -> str:
        """检测用户意图类型"""
        for intent, keywords in self.USER_INTENT_KEYWORDS.items():
            for kw in keywords:
                if kw in content:
                    return intent
        return 'general'
    
    def _extract_entities(self, content: str) -> Dict:
        """提取实体信息"""
        # 简单实现,实际可用NER模型
        entities = {
            'names': [],
            'dates': [],
            'numbers': [],
            'topics': []
        }
        
        import re
        # 日期
        dates = re.findall(r'\d{4}[-/年]\d{1,2}[-/月]\d{1,2}?', content)
        entities['dates'].extend(dates)
        
        # 数字
        numbers = re.findall(r'\d+', content)
        entities['numbers'].extend(numbers[:5])  # 限制数量
        
        return entities

五、遗忘机制

5.1 基于时间的遗忘

import time
from datetime import datetime, timedelta
 
class TimeBasedForgettingManager:
    """基于时间的遗忘机制"""
    
    def __init__(
        self,
        max_age_hours: int = 24,
        decay_rate: float = 0.1
    ):
        self.max_age_hours = max_age_hours
        self.decay_rate = decay_rate
        self.messages = []
        self.message_id_counter = 0
    
    def add_message(self, role: str, content: str):
        """添加消息"""
        self.messages.append({
            'id': self.message_id_counter,
            'role': role,
            'content': content,
            'created_at': datetime.now(),
            'last_accessed': datetime.now(),
            'access_count': 0
        })
        self.message_id_counter += 1
    
    def access_message(self, message_id: int):
        """访问消息(更新访问时间)"""
        for msg in self.messages:
            if msg['id'] == message_id:
                msg['last_accessed'] = datetime.now()
                msg['access_count'] += 1
                break
    
    def get_active_context(self) -> List[Dict]:
        """获取活跃上下文"""
        now = datetime.now()
        active_messages = []
        
        for msg in self.messages:
            age = now - msg['created_at']
            
            # 完全过期,删除
            if age > timedelta(hours=self.max_age_hours * 2):
                self.messages.remove(msg)
                continue
            
            # 计算衰减后的重要性
            decay_factor = self._calculate_decay(msg, now)
            
            if decay_factor > 0.3:  # 保留阈值
                msg['decay_factor'] = decay_factor
                active_messages.append(msg)
        
        return active_messages
    
    def _calculate_decay(self, msg: Dict, now: datetime) -> float:
        """计算衰减因子"""
        # 基于时间的衰减
        age = now - msg['created_at']
        hours_elapsed = age.total_seconds() / 3600
        
        time_decay = max(0, 1 - hours_elapsed * self.decay_rate / self.max_age_hours)
        
        # 基于访问的增强
        access_boost = min(msg['access_count'] * 0.1, 0.3)
        
        return min(time_decay + access_boost, 1.0)

5.2 基于重要性的遗忘

class ImportanceBasedForgettingManager:
    """基于重要性的遗忘机制"""
    
    def __init__(
        self,
        max_messages: int = 50,
        min_importance_threshold: float = 0.2
    ):
        self.max_messages = max_messages
        self.min_importance_threshold = min_importance_threshold
        self.messages = []
    
    def add_message(self, role: str, content: str, importance: float = 0.5):
        """添加消息"""
        self.messages.append({
            'role': role,
            'content': content,
            'importance': importance,
            'key_points': self._extract_key_points(content)
        })
        
        # 遗忘低重要性消息
        self._forget_if_needed()
    
    def _extract_key_points(self, content: str) -> List[str]:
        """提取关键点"""
        # 简单实现:提取包含关键词的句子
        key_phrases = ['但是', '所以', '因为', '因此', '关键', '重要']
        points = []
        
        for phrase in key_phrases:
            if phrase in content:
                idx = content.index(phrase)
                start = max(0, idx - 20)
                end = min(len(content), idx + 30)
                points.append(content[start:end])
        
        return points[:3]  # 最多保留3个
    
    def _forget_if_needed(self):
        """遗忘不必要的消息"""
        if len(self.messages) <= self.max_messages:
            return
        
        # 按重要性排序
        sorted_messages = sorted(
            self.messages,
            key=lambda x: x['importance'],
            reverse=True
        )
        
        # 保留高重要性的
        self.messages = sorted_messages[:self.max_messages]
        
        # 如果有低重要性的被删除,保留其关键点
        forgotten = sorted_messages[self.max_messages:]
        if forgotten:
            preserved_key_points = []
            for msg in forgotten:
                if msg['key_points']:
                    preserved_key_points.extend(msg['key_points'])
            
            if preserved_key_points:
                self.preserved_context = {
                    'key_points': preserved_key_points,
                    'count': len(forgotten)
                }
    
    def get_context_with_forgetting(self) -> str:
        """获取考虑遗忘的上下文"""
        parts = []
        
        # 添加保留的关键点摘要
        if hasattr(self, 'preserved_context') and self.preserved_context:
            points = self.preserved_context['key_points'][:5]
            if points:
                parts.append("【之前提到的关键信息】")
                for point in points:
                    parts.append(f"- {point}")
                parts.append("")
        
        # 添加当前消息
        for msg in self.messages:
            parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n".join(parts)

六、长期记忆与短期记忆整合

6.1 记忆层次架构

┌─────────────────────────────────────────┐
│          长期记忆 (Long-term Memory)       │
│  - 用户偏好    - 重要事实    - 历史总结    │
├─────────────────────────────────────────┤
│          短期记忆 (Short-term Memory)     │
│  - 当前对话    - 最近上下文  - 临时信息    │
├─────────────────────────────────────────┤
│          工作记忆 (Working Memory)        │
│  - 当前任务    - 活跃焦点    - 即时需求    │
└─────────────────────────────────────────┘

6.2 整合实现

class IntegratedMemoryManager:
    """整合的记忆管理器"""
    
    def __init__(self, llm_client, storage_backend=None):
        self.llm = llm_client
        self.storage = storage_backend  # 可以是数据库、文件等
        
        # 短期记忆
        self.short_term = []
        
        # 工作记忆
        self.working_memory = {
            'current_task': None,
            'active_focus': None,
            'pending_items': []
        }
    
    def add_to_short_term(self, role: str, content: str):
        """添加到短期记忆"""
        self.short_term.append({
            'role': role,
            'content': content,
            'timestamp': datetime.now()
        })
        
        # 检查是否需要转移到长期记忆
        self._check_long_term_extraction()
    
    def _check_long_term_extraction(self):
        """检查是否需要提取到长期记忆"""
        if len(self.short_term) < 10:
            return
        
        # 检查是否有值得保留的信息
        recent = self.short_term[-5:]
        
        # 使用LLM判断是否有重要信息
        prompt = f"""分析以下对话,判断是否有需要长期记住的信息。
 
对话:
{self._format_dialogue(recent)}
 
输出格式:
- 如果有重要信息,列出需要记住的要点
- 如果没有,输出"无需保留"
"""
        
        result = self.llm.generate(prompt)
        
        if "无需保留" not in result:
            # 保存到长期记忆
            self._save_to_long_term(result)
            # 清理短期记忆
            self.short_term = self.short_term[-3:]  # 保留最近3条
    
    def _save_to_long_term(self, summary: str):
        """保存到长期记忆"""
        if self.storage:
            self.storage.save('long_term_memory', {
                'summary': summary,
                'timestamp': datetime.now().isoformat()
            })
    
    def get_long_term_memory(self, query: str = "") -> str:
        """获取长期记忆"""
        if not self.storage:
            return ""
        
        memory = self.storage.load('long_term_memory')
        if not memory:
            return ""
        
        # 如果有查询,过滤相关内容
        if query:
            relevant = self._filter_relevant(memory, query)
            return relevant
        
        return memory.get('summary', '')
    
    def _filter_relevant(self, memory: Dict, query: str) -> str:
        """过滤相关内容"""
        summary = memory.get('summary', '')
        
        # 简单关键词匹配
        query_terms = set(query.lower().split())
        summary_terms = set(summary.lower().split())
        
        if query_terms & summary_terms:
            return summary
        
        return ""
    
    def update_working_memory(self, task: str = None, focus: str = None):
        """更新工作记忆"""
        if task:
            self.working_memory['current_task'] = task
        if focus:
            self.working_memory['active_focus'] = focus
    
    def get_full_context(self, current_query: str = "") -> str:
        """获取完整上下文"""
        parts = []
        
        # 1. 工作记忆
        if self.working_memory['current_task']:
            parts.append(f"【当前任务】{self.working_memory['current_task']}")
        if self.working_memory['active_focus']:
            parts.append(f"【当前焦点】{self.working_memory['active_focus']}")
        
        # 2. 长期记忆
        long_term = self.get_long_term_memory(current_query)
        if long_term:
            parts.append(f"【长期记忆】\n{long_term}")
        
        # 3. 短期记忆
        if self.short_term:
            parts.append("【近期对话】")
            for msg in self.short_term:
                parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n\n".join(parts)
    
    def _format_dialogue(self, messages: List[Dict]) -> str:
        return "\n".join([f"{m['role']}: {m['content']}" for m in messages])

七、完整对话管理实现

class ProductionConversationManager:
    """生产级对话管理器"""
    
    def __init__(
        self,
        llm_client,
        max_tokens: int = 80000,
        summarize_threshold: int = 30000
    ):
        self.llm = llm_client
        self.max_tokens = max_tokens
        self.summarize_threshold = summarize_threshold
        
        # 核心组件
        self.full_history = []  # 完整历史
        self.summary_history = []  # 摘要历史
        self.summarizer = ProgressiveSummarizer(llm_client)
        self.importance_manager = ImportanceWeightedHistoryManager(llm_client)
        
        # 配置
        self.summary_trigger_ratio = 0.5  # 50%时触发摘要
    
    def add_message(self, role: str, content: str, metadata: Dict = None):
        """添加消息"""
        message = {
            'role': role,
            'content': content,
            'metadata': metadata or {},
            'timestamp': datetime.now().isoformat()
        }
        
        self.full_history.append(message)
        
        # 重要性评估
        self.importance_manager.add_message(role, content)
        
        # 摘要检查
        self._check_summarization()
    
    def _check_summarization(self):
        """检查是否需要摘要"""
        total = self._calculate_total_tokens()
        
        if total > self.max_tokens:
            self._perform_summarization()
        elif total > self.summarize_threshold:
            # 触发渐进式摘要
            self.summarizer.add_message(
                self.full_history[-1]['role'],
                self.full_history[-1]['content']
            )
            summary = self.summarizer.summarize_if_needed(
                threshold_tokens=self.summarize_threshold // 2
            )
            if summary:
                self.summary_history.append(summary)
    
    def _perform_summarization(self):
        """执行摘要"""
        # 保留最近的1/3,清摘要前2/3
        keep_count = len(self.full_history) // 3
        to_summarize = self.full_history[:-keep_count]
        
        prompt = f"""总结以下对话的关键内容:
 
{self._format_dialogue(to_summarize)}
 
要求:
1. 保留所有重要决定和结论
2. 保留关键的用户需求
3. 删除重复和次要细节
"""
        
        summary = self.llm.generate(prompt)
        self.summary_history.append(summary)
        self.full_history = self.full_history[-keep_count:]
    
    def get_context_for_llm(self, current_query: str = "") -> str:
        """获取LLM上下文"""
        parts = []
        
        # 1. 历史摘要
        if self.summary_history:
            parts.append("【早期对话摘要】")
            for i, summary in enumerate(self.summary_history):
                parts.append(f"阶段{i+1}: {summary}")
            parts.append("")
        
        # 2. 完整近期历史
        for msg in self.full_history:
            parts.append(f"{msg['role']}: {msg['content']}")
        
        return "\n\n".join(parts)
    
    def _calculate_total_tokens(self) -> int:
        total = 0
        for summary in self.summary_history:
            total += self._estimate_tokens(summary)
        for msg in self.full_history:
            total += self._estimate_tokens(msg['content'])
        return total
    
    @staticmethod
    def _estimate_tokens(text: str) -> int:
        chinese = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
        english = len(text.split()) - chinese
        return int(chinese * 0.5 + english * 0.25)
    
    @staticmethod
    def _format_dialogue(messages: List[Dict]) -> str:
        return "\n".join([f"{m['role']}: {m['content']}" for m in messages])

八、相关主题

九、参考文献

  1. Miller, A. (2023). Retrieval-Augmented Generation for Conversational AI.
  2. Xu, S., et al. (2023). MemoRAG: Moving towards Next-Gen RAG via Memory-Augmented Generation.
  3. Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks.