Add File

2025-11-07 09:05:39 +08:00
parent c8a8866e40
commit eca5563f26
1 changed files with 306 additions and 0 deletions
--- a/src/summeryanyfile/graph/nodes.py
+++ b/src/summeryanyfile/graph/nodes.py
@@ -0,0 +1,306 @@
 """
 图节点实现 - 定义LangGraph工作流中的各个节点
 """
 import json
 from typing import Dict, Any, Literal
 import logging
 from langchain_core.runnables import RunnableConfig
 from ..core.models import PPTState
 from ..core.json_parser import JSONParser
 from ..generators.chains import ChainManager, ChainExecutor
 from ..utils.logger import LoggerMixin
 logger = logging.getLogger(__name__)
 class GraphNodes(LoggerMixin):
    """图节点集合，包含所有工作流节点的实现"""
    def __init__(self, chain_manager: ChainManager, config=None):
        self.chain_manager = chain_manager
        self.chain_executor = ChainExecutor(chain_manager)
        self.json_parser = JSONParser()
        self.config = config  # 添加配置参数
    def _get_slides_range_text(self, state: Dict[str, Any]) -> str:
        """根据状态中的页数模式生成页数约束文本"""
        page_count_mode = state.get("page_count_mode", "ai_decide")
        min_pages = state.get("min_pages")
        max_pages = state.get("max_pages")
        fixed_pages = state.get("fixed_pages")
        if page_count_mode == "fixed" and fixed_pages:
            result = f"【强制要求】必须生成恰好{fixed_pages}页的PPT，不能多也不能少"
        elif page_count_mode == "custom_range" and min_pages and max_pages:
            result = f"【强制要求】必须严格控制在{min_pages}-{max_pages}页范围内，最少{min_pages}页，最多{max_pages}页，不能超出此范围"
        else:  # ai_decide
            result = "根据内容的复杂度、深度和逻辑结构，自主决定最合适的页数，确保内容充实且逻辑清晰"
        return result
    async def analyze_structure(self, state: PPTState, config: RunnableConfig) -> Dict[str, Any]:
        """
        分析文档结构节点
        Args:
            state: 当前状态
            config: 运行配置
        Returns:
            更新的状态字段
        """
        self.logger.info("开始分析文档结构...")
        try:
            # 获取第一个文档块
            first_chunk = state["document_chunks"][0] if state["document_chunks"] else ""
            if not first_chunk.strip():
                self.logger.warning("第一个文档块为空，使用默认结构")
                structure = {
                    "title": "文档分析",
                    "type": "通用文档",
                    "sections": [],
                    "key_concepts": [],
                    "language": "中文",
                    "complexity": "中等"
                }
            else:
                # 调用结构分析链
                structure_response = await self.chain_executor.execute_with_retry(
                    "structure_analysis",
                    {
                        "content": first_chunk,
                        "project_topic": state.get("project_topic", ""),
                        "project_scenario": state.get("project_scenario", "general"),
                        "project_requirements": state.get("project_requirements", ""),
                        "target_audience": state.get("target_audience", "普通大众"),
                        "custom_audience": state.get("custom_audience", ""),
                        "ppt_style": state.get("ppt_style", "general"),
                        "custom_style_prompt": state.get("custom_style_prompt", "")
                    },
                    config
                )
                # 解析JSON响应
                structure = self.json_parser.extract_json_from_response(structure_response)
                # 验证结构
                if not isinstance(structure, dict):
                    raise ValueError("结构分析返回的不是有效的字典")
            self.logger.info(f"文档结构分析完成: {structure.get('title', '未知标题')}")
            return {
                "document_structure": structure,
                "accumulated_context": first_chunk[:500]  # 保留前500字作为上下文
            }
        except Exception as e:
            self.logger.error(f"文档结构分析失败: {e}")
            # 返回默认结构
            return {
                "document_structure": {
                    "title": "文档分析",
                    "type": "通用文档",
                    "sections": [],
                    "key_concepts": [],
                    "language": "中文",
                    "complexity": "中等"
                },
                "accumulated_context": first_chunk[:500] if state["document_chunks"] else ""
            }
    async def generate_initial_outline(self, state: PPTState, config: RunnableConfig) -> Dict[str, Any]:
        """
        生成初始PPT框架节点
        Args:
            state: 当前状态
            config: 运行配置
        Returns:
            更新的状态字段
        """
        self.logger.info("开始生成初始PPT框架...")
        try:
            # 准备输入
            structure_json = json.dumps(state["document_structure"], ensure_ascii=False)
            first_chunk = state["document_chunks"][0] if state["document_chunks"] else ""
            # 准备输入参数，包含页数范围、目标语言和项目信息
            chain_inputs = {
                "structure": structure_json,
                "content": first_chunk,
                "project_topic": state.get("project_topic", ""),
                "project_scenario": state.get("project_scenario", "general"),
                "project_requirements": state.get("project_requirements", ""),
                "target_audience": state.get("target_audience", "普通大众"),
                "custom_audience": state.get("custom_audience", ""),
                "ppt_style": state.get("ppt_style", "general"),
                "custom_style_prompt": state.get("custom_style_prompt", "")
            }
            # 添加页数范围信息
            slides_range_text = self._get_slides_range_text(state)
            chain_inputs["slides_range"] = slides_range_text
            if self.config:
                chain_inputs["target_language"] = self.config.target_language
            else:
                chain_inputs["target_language"] = "zh"  # 默认中文
            # 调用初始大纲生成链
            outline_response = await self.chain_executor.execute_with_retry(
                "initial_outline",
                chain_inputs,
                config
            )
            # 解析JSON响应
            outline = self.json_parser.extract_json_from_response(outline_response)
            # 验证和修复大纲结构
            outline = self.json_parser.validate_ppt_structure(outline)
            self.logger.info(f"初始PPT框架生成完成: {outline.get('title', '未知标题')}")
            return {
                **state,  # 保留所有原始状态
                "ppt_title": outline.get("title", "学术演示"),
                "total_pages": outline.get("total_pages", 15),
                "page_count_mode": state.get("page_count_mode", "estimated"),  # 保持原始页数模式
                "slides": outline.get("slides", []),
                "current_index": 1
            }
        except Exception as e:
            self.logger.error(f"初始PPT框架生成失败: {e}")
            # 返回默认框架
            return {
                "ppt_title": "学术演示",
                "total_pages": 15,
                "page_count_mode": "estimated",
                "slides": [
                    {
                        "page_number": 1,
                        "title": "标题页",
                        "content_points": ["演示标题", "演示者", "日期"],
                        "slide_type": "title",
                        "description": "PPT开场标题页"
                    }
                ],
                "current_index": 1
            }
    async def refine_outline(self, state: PPTState, config: RunnableConfig) -> Dict[str, Any]:
        """
        细化PPT大纲节点
        Args:
            state: 当前状态
            config: 运行配置
        Returns:
            更新的状态字段
        """
        current_index = state["current_index"]
        total_chunks = len(state["document_chunks"])
        self.logger.info(f"正在细化PPT大纲 ({current_index + 1}/{total_chunks})...")
        # 检查是否还有内容需要处理
        if current_index >= total_chunks:
            self.logger.info("所有文档块已处理完成")
            return state
        try:
            # 获取当前文档块
            current_content = state["document_chunks"][current_index]
            # 准备现有大纲
            existing_outline = {
                "title": state["ppt_title"],
                "total_pages": state["total_pages"],
                "slides": state["slides"]
            }
            existing_outline_json = json.dumps(existing_outline, ensure_ascii=False)
            # 准备输入参数，包含页数范围、目标语言和项目信息
            chain_inputs = {
                "existing_outline": existing_outline_json,
                "new_content": current_content,
                "context": state["accumulated_context"],
                "project_topic": state.get("project_topic", ""),
                "project_scenario": state.get("project_scenario", "general"),
                "project_requirements": state.get("project_requirements", ""),
                "target_audience": state.get("target_audience", "普通大众"),
                "custom_audience": state.get("custom_audience", ""),
                "ppt_style": state.get("ppt_style", "general"),
                "custom_style_prompt": state.get("custom_style_prompt", "")
            }
            # 添加页数范围信息和目标语言
            slides_range_text = self._get_slides_range_text(state)
            chain_inputs["slides_range"] = slides_range_text
            if self.config:
                chain_inputs["target_language"] = self.config.target_language
            else:
                chain_inputs["target_language"] = "zh"  # 默认中文
            # 调用细化链
            refined_response = await self.chain_executor.execute_with_retry(
                "refine_outline",
                chain_inputs,
                config
            )
            # 解析JSON响应
            refined_outline = self.json_parser.extract_json_from_response(refined_response)
            # 验证和修复结构
            refined_outline = self.json_parser.validate_ppt_structure(refined_outline)
            # 更新累积上下文
            new_context = state["accumulated_context"] + "\n" + current_content[:300]
            if len(new_context) > 2000:  # 限制上下文长度
                new_context = new_context[-2000:]
            return {
                **state,  # 保留所有原始状态
                "ppt_title": refined_outline.get("title", state["ppt_title"]),
                "total_pages": refined_outline.get("total_pages", state["total_pages"]),
                "slides": refined_outline.get("slides", state["slides"]),
                "current_index": current_index + 1,
                "accumulated_context": new_context
            }
        except Exception as e:
            self.logger.error(f"PPT大纲细化失败: {e}")
            # 继续处理下一个块
            return {
                **state,
                "current_index": current_index + 1
            }
    def should_continue_refining(self, state: PPTState) -> Literal["refine_outline", "end"]:
        """
        判断是否继续细化的条件函数
        Args:
            state: 当前状态
        Returns:
            下一个节点名称
        """
        current_index = state["current_index"]
        total_chunks = len(state["document_chunks"])
        if current_index >= total_chunks:
            self.logger.info("所有文档块已处理，完成大纲生成")
            return "end"
        else:
            self.logger.debug(f"继续处理文档块 {current_index + 1}/{total_chunks}")
            return "refine_outline"