From a11eadb53cd9c04826672c889b13649122fbf8f7 Mon Sep 17 00:00:00 2001 From: 13315423919 <13315423919@qq.com> Date: Fri, 7 Nov 2025 09:05:23 +0800 Subject: [PATCH] Add File --- src/landppt/services/enhanced_ppt_service.py | 6657 ++++++++++++++++++ 1 file changed, 6657 insertions(+) create mode 100644 src/landppt/services/enhanced_ppt_service.py diff --git a/src/landppt/services/enhanced_ppt_service.py b/src/landppt/services/enhanced_ppt_service.py new file mode 100644 index 0000000..f8c7796 --- /dev/null +++ b/src/landppt/services/enhanced_ppt_service.py @@ -0,0 +1,6657 @@ +""" +Enhanced PPT Service with real AI integration and project management +""" + +import json +import re +import logging +import uuid +import asyncio +import time +import os +import tempfile +from pathlib import Path +from typing import Dict, Any, List, Optional + +from ..api.models import ( + PPTGenerationRequest, PPTOutline, EnhancedPPTOutline, + SlideContent, PPTProject, TodoBoard +) +from ..ai import get_ai_provider, get_role_provider, AIMessage, MessageRole +from ..core.config import ai_config +from .ppt_service import PPTService +from .db_project_manager import DatabaseProjectManager +from .global_master_template_service import GlobalMasterTemplateService + +from .research.enhanced_research_service import EnhancedResearchService +from .research.enhanced_report_generator import EnhancedReportGenerator +from .prompts import prompts_manager +from .image.image_service import ImageService +from .image.adapters.ppt_prompt_adapter import PPTSlideContext +from ..utils.thread_pool import run_blocking_io, to_thread + +# Configure logger for this module +logger = logging.getLogger(__name__) + +class EnhancedPPTService(PPTService): + """Enhanced PPT service with real AI integration and project management""" + + def __init__(self, provider_name: Optional[str] = None): + super().__init__() + self.provider_name = provider_name + self.project_manager = DatabaseProjectManager() + self.global_template_service = GlobalMasterTemplateService(provider_name) + + # 配置属性,用于summeryanyfile集成 + # 初始化配置(将在需要时实时更新) + self.config = self._get_current_ai_config() + + # 初始化文件缓存管理器 - 设置缓存目录到项目根目录下的temp文件夹,每个模式的缓存分开管理 + try: + from summeryanyfile.core.file_cache_manager import FileCacheManager + import os + from pathlib import Path + + # 获取项目根目录 + project_root = Path(__file__).parent.parent.parent.parent + + # 为不同模式创建分离的缓存目录 + base_cache_dir = project_root / "temp" + + # 创建分模式的缓存目录结构 + cache_dirs = { + 'summeryanyfile': base_cache_dir / "summeryanyfile_cache", + 'style_genes': base_cache_dir / "style_genes_cache", + 'ai_responses': base_cache_dir / "ai_responses_cache", + 'templates': base_cache_dir / "templates_cache" + } + + # 确保所有缓存目录存在 + for cache_type, cache_path in cache_dirs.items(): + cache_path.mkdir(parents=True, exist_ok=True) + + # 初始化主要的文件缓存管理器(用于summeryanyfile) + self.file_cache_manager = FileCacheManager(cache_dir=str(cache_dirs['summeryanyfile'])) + + # 存储缓存目录配置供其他功能使用 + self.cache_dirs = cache_dirs + + logger.info(f"文件缓存管理器已初始化,分模式缓存目录: {cache_dirs}") + except ImportError as e: + logger.warning(f"无法导入文件缓存管理器: {e}") + self.file_cache_manager = None + self.cache_dirs = None + + # 初始化研究服务 + self.research_service = None + self.report_generator = None + self._initialize_research_services() + + # 初始化图片服务 + self.image_service = None + self._initialize_image_service() + + @property + def ai_provider(self): + """Dynamically get AI provider to ensure latest config""" + provider_name = self.provider_name or ai_config.default_ai_provider + return get_ai_provider(provider_name) + + def _initialize_research_services(self): + """Initialize enhanced research service""" + try: + # Initialize enhanced research service + self.enhanced_research_service = EnhancedResearchService() + self.enhanced_report_generator = EnhancedReportGenerator() + + # Check availability + enhanced_available = self.enhanced_research_service.is_available() + + if enhanced_available: + logger.info("Enhanced Research service initialized successfully") + available_providers = self.enhanced_research_service.get_available_providers() + logger.info(f"Available research providers: {', '.join(available_providers)}") + else: + logger.warning("Enhanced research service not available - check API configurations") + + except Exception as e: + logger.warning(f"Failed to initialize enhanced research service: {e}") + self.enhanced_research_service = None + self.enhanced_report_generator = None + + + + def _initialize_image_service(self): + """Initialize image service""" + try: + from .image.config.image_config import get_image_config + + # 获取图片服务配置 + config_manager = get_image_config() + image_config = config_manager.get_config() + + # 更新缓存目录配置 + if self.cache_dirs: + image_config['cache']['base_dir'] = str(self.cache_dirs['ai_responses'] / 'images_cache') + + # 验证配置 + config_errors = config_manager.validate_config() + if config_errors: + logger.warning(f"Image service configuration errors: {config_errors}") + + # 检查已配置的提供者 + configured_providers = config_manager.get_configured_providers() + if configured_providers: + logger.info(f"Configured image providers: {configured_providers}") + else: + logger.warning("No image providers configured. Please set API keys in environment variables.") + + self.image_service = ImageService(image_config) + # 异步初始化图片服务 + import asyncio + if asyncio.get_event_loop().is_running(): + # 如果在异步环境中,创建任务来初始化 + asyncio.create_task(self._async_initialize_image_service()) + else: + # 如果不在异步环境中,同步初始化 + asyncio.run(self.image_service.initialize()) + logger.info("Image service initialized successfully") + + except Exception as e: + logger.warning(f"Failed to initialize image service: {e}") + self.image_service = None + + async def _async_initialize_image_service(self): + """异步初始化图片服务""" + try: + if self.image_service and not self.image_service.initialized: + await self.image_service.initialize() + logger.debug("Image service async initialization completed") + except Exception as e: + logger.error(f"Failed to async initialize image service: {e}") + + def reload_research_config(self): + """Reload enhanced research service configuration""" + if hasattr(self, 'enhanced_research_service') and self.enhanced_research_service: + try: + # Enhanced research service doesn't have reload_config method, so reinitialize + self._initialize_research_services() + logger.info("Enhanced research service configuration reloaded in EnhancedPPTService") + except Exception as e: + logger.warning(f"Failed to reload enhanced research service config: {e}") + # If reload fails, reinitialize + self._initialize_research_services() + + def _get_current_ai_config(self, role: str = "default"): + """获取当前最新的AI配置""" + role_settings = ai_config.get_model_config_for_role(role, provider_override=self.provider_name) + return { + "llm_model": role_settings.get("model"), + "llm_provider": role_settings.get("provider"), + "temperature": getattr(ai_config, 'temperature', 0.7), + "max_tokens": getattr(ai_config, 'max_tokens', 2000) + } + + def _get_role_provider(self, role: str): + """获取指定任务角色的提供者和配置""" + return get_role_provider(role, provider_override=self.provider_name) + + async def _text_completion_for_role(self, role: str, *, prompt: str, **kwargs): + """调用指定角色的模型进行文本补全""" + provider, settings = self._get_role_provider(role) + if settings.get("model"): + kwargs.setdefault("model", settings["model"]) + return await provider.text_completion(prompt=prompt, **kwargs) + + async def _chat_completion_for_role(self, role: str, *, messages: List[AIMessage], **kwargs): + """调用指定角色的模型进行对话补全""" + provider, settings = self._get_role_provider(role) + if settings.get("model"): + kwargs.setdefault("model", settings["model"]) + return await provider.chat_completion(messages=messages, **kwargs) + + def update_ai_config(self): + """更新AI配置到最新状态""" + self.config = self._get_current_ai_config() + logger.info(f"AI配置已更新: provider={self.config['llm_provider']}, model={self.config['llm_model']}") + + def _configure_summeryfile_api(self, generator, role: str = "default"): + """配置summeryanyfile的API设置""" + try: + import os + # 获取当前角色的配置 + role_settings = ai_config.get_model_config_for_role(role, provider_override=self.provider_name) + current_provider = role_settings.get("provider") + provider_config = ai_config.get_provider_config(current_provider).copy() + if role_settings.get("model"): + provider_config["model"] = role_settings["model"] + + # 设置通用配置参数 + if provider_config.get("max_tokens"): + os.environ["MAX_TOKENS"] = str(provider_config["max_tokens"]) + if provider_config.get("temperature"): + os.environ["TEMPERATURE"] = str(provider_config["temperature"]) + + if current_provider == "openai": + if provider_config.get("api_key"): + os.environ["OPENAI_API_KEY"] = provider_config["api_key"] + if provider_config.get("base_url"): + os.environ["OPENAI_BASE_URL"] = provider_config["base_url"] + if provider_config.get("model"): + os.environ["OPENAI_MODEL"] = provider_config["model"] + + logger.info(f"已配置summeryanyfile OpenAI API: model={provider_config.get('model')}, base_url={provider_config.get('base_url')}") + + elif current_provider == "anthropic": + if provider_config.get("api_key"): + os.environ["ANTHROPIC_API_KEY"] = provider_config["api_key"] + if provider_config.get("model"): + os.environ["ANTHROPIC_MODEL"] = provider_config["model"] + + logger.info(f"已配置summeryanyfile Anthropic API: model={provider_config.get('model')}") + + elif current_provider in ("google", "gemini"): + if provider_config.get("api_key"): + os.environ["GOOGLE_API_KEY"] = provider_config["api_key"] + if provider_config.get("model"): + os.environ["GOOGLE_MODEL"] = provider_config["model"] + if provider_config.get("base_url"): + os.environ["GOOGLE_BASE_URL"] = provider_config["base_url"] + + logger.info(f"已配置summeryanyfile Google/Gemini API: model={provider_config.get('model')}") + + elif current_provider == "ollama": + if provider_config.get("base_url"): + os.environ["OLLAMA_BASE_URL"] = provider_config["base_url"] + if provider_config.get("model"): + os.environ["OLLAMA_MODEL"] = provider_config["model"] + + logger.info(f"已配置summeryanyfile Ollama API: model={provider_config.get('model')}, base_url={provider_config.get('base_url')}") + + elif current_provider == "302ai": + if provider_config.get("api_key"): + os.environ["302AI_API_KEY"] = provider_config["api_key"] + if provider_config.get("base_url"): + os.environ["302AI_BASE_URL"] = provider_config["base_url"] + if provider_config.get("model"): + os.environ["302AI_MODEL"] = provider_config["model"] + + logger.info(f"已配置summeryanyfile 302.AI API: model={provider_config.get('model')}, base_url={provider_config.get('base_url')}") + + logger.info(f"已配置summeryanyfile通用参数: max_tokens={provider_config.get('max_tokens')}, temperature={provider_config.get('temperature')}") + + except Exception as e: + logger.warning(f"配置summeryanyfile API时出现问题: {e}") + def get_cache_stats(self) -> Dict[str, Any]: + """ + 获取文件缓存统计信息 + + Returns: + 缓存统计信息字典 + """ + if self.file_cache_manager: + return self.file_cache_manager.get_cache_stats() + else: + return {"error": "缓存管理器未初始化"} + + def cleanup_cache(self): + """清理过期的缓存条目""" + # 清理summeryanyfile缓存 + if self.file_cache_manager: + try: + self.file_cache_manager.cleanup_expired_cache() + logger.info("summeryanyfile缓存清理完成") + except Exception as e: + logger.error(f"summeryanyfile缓存清理失败: {e}") + + # 清理设计基因缓存 + self._cleanup_style_genes_cache() + + # 清理内存缓存 + if hasattr(self, '_cached_style_genes'): + self._cached_style_genes.clear() + logger.info("内存中的设计基因缓存已清理") + + def _cleanup_style_genes_cache(self, max_age_days: int = 7): + """清理过期的设计基因缓存文件""" + if not hasattr(self, 'cache_dirs') or not self.cache_dirs: + return + + try: + import json + import time + from pathlib import Path + + cache_dir = self.cache_dirs['style_genes'] + if not cache_dir.exists(): + return + + current_time = time.time() + max_age_seconds = max_age_days * 24 * 3600 + cleaned_count = 0 + + for cache_file in cache_dir.glob("*_style_genes.json"): + try: + with open(cache_file, 'r', encoding='utf-8') as f: + cache_data = json.load(f) + created_at = cache_data.get('created_at', 0) + + if current_time - created_at > max_age_seconds: + cache_file.unlink() + cleaned_count += 1 + logger.debug(f"删除过期的设计基因缓存文件: {cache_file.name}") + + except Exception as e: + logger.warning(f"处理缓存文件 {cache_file} 时出错: {e}") + + if cleaned_count > 0: + logger.info(f"设计基因缓存清理完成,删除了 {cleaned_count} 个过期文件") + else: + logger.info("设计基因缓存清理完成,没有过期文件需要删除") + + except Exception as e: + logger.error(f"设计基因缓存清理失败: {e}") + + async def generate_outline(self, request: PPTGenerationRequest, page_count_settings: Dict[str, Any] = None) -> PPTOutline: + """Generate PPT outline using real AI with optional Enhanced research and page count settings""" + try: + research_context = "" + + # Check if network mode is enabled and research service is available + if request.network_mode: + # Use enhanced research service + if hasattr(self, 'enhanced_research_service') and self.enhanced_research_service.is_available(): + logger.info(f"Starting Enhanced research for topic: {request.topic}") + try: + # Prepare research context + research_context = { + 'scenario': request.scenario, + 'target_audience': getattr(request, 'target_audience', '普通大众'), + 'requirements': request.requirements, + 'ppt_style': getattr(request, 'ppt_style', 'general'), + 'description': getattr(request, 'description', '') + } + + # Conduct enhanced research with context + enhanced_report = await self.enhanced_research_service.conduct_enhanced_research( + topic=request.topic, + language=request.language, + context=research_context + ) + + # Save enhanced report first + report_path = None + if hasattr(self, 'enhanced_report_generator'): + try: + report_path = self.enhanced_report_generator.save_report_to_file(enhanced_report) + logger.info(f"Enhanced research report saved to: {report_path}") + except Exception as save_error: + logger.warning(f"Failed to save enhanced research report: {save_error}") + + # Use the saved markdown file to generate outline using file-based method + if report_path and Path(report_path).exists(): + logger.info(f"Using saved research report file for outline generation: {report_path}") + try: + # Create a file request object for the saved report + from ..api.models import FileOutlineGenerationRequest + file_request = FileOutlineGenerationRequest( + file_path=report_path, + filename=Path(report_path).name, + topic=request.topic, + scenario=request.scenario, + requirements=request.requirements, + target_audience=getattr(request, 'target_audience', '普通大众'), + ppt_style=getattr(request, 'ppt_style', 'general'), + custom_style_prompt=getattr(request, 'custom_style_prompt', ''), + page_count_mode=page_count_settings.get('mode', 'ai_decide') if page_count_settings else 'ai_decide', + min_pages=page_count_settings.get('min_pages') if page_count_settings else None, + max_pages=page_count_settings.get('max_pages') if page_count_settings else None, + fixed_pages=page_count_settings.get('fixed_pages') if page_count_settings else None, + language=request.language + ) + + # Generate outline from the research report file + file_outline_result = await self.generate_outline_from_file(file_request) + + # Convert the file-based outline result to PPTOutline format + if file_outline_result.success and file_outline_result.outline: + outline_data = file_outline_result.outline + outline = PPTOutline( + title=outline_data.get('title', request.topic), + slides=outline_data.get('slides', []), + metadata={ + **outline_data.get('metadata', {}), + 'research_enhanced': True, + 'research_file_path': report_path, + 'generated_from_research_file': True + } + ) + + # Add page count settings to metadata + if page_count_settings: + outline.metadata["page_count_settings"] = page_count_settings + + logger.info("Successfully generated outline from research report file") + return outline + else: + logger.warning("File-based outline generation failed, falling back to traditional method") + + except Exception as file_error: + logger.warning(f"Failed to generate outline from research file, falling back to traditional method: {file_error}") + + # Fallback: No research context available when file method fails + research_context = "" + logger.info("Enhanced research completed but file-based outline generation failed") + + except Exception as e: + logger.error(f"Enhanced research failed: {e}") + research_context = "" + + + else: + logger.info("Network mode enabled but no research services available") + research_context = "" + + # Create AI prompt for outline generation (with or without research context and page count settings) + prompt = self._create_outline_prompt(request, research_context, page_count_settings) + + # Generate outline using AI + response = await self._text_completion_for_role("outline", + prompt=prompt, + max_tokens=ai_config.max_tokens, + temperature=ai_config.temperature + ) + + # Parse AI response to create structured outline + outline = self._parse_ai_outline(response.content, request) + + # Research metadata is now handled in the file-based generation method above + + # Add page count settings to metadata + if page_count_settings: + outline.metadata["page_count_settings"] = page_count_settings + + return outline + + except Exception as e: + logger.error(f"Error generating AI outline: {str(e)}") + # 不再使用fallback,直接抛出异常 + if "timeout" in str(e).lower() or "request timed out" in str(e).lower(): + raise Exception("AI服务响应超时,请检查网络连接后重新生成大纲。") + elif "api" in str(e).lower() and "error" in str(e).lower(): + raise Exception("AI服务暂时不可用,请稍后重新生成大纲。") + else: + raise Exception(f"AI生成大纲失败:{str(e)}。请重新生成大纲。") + + async def generate_slides_parallel(self, slide_requests: List[Dict[str, Any]], scenario: str, topic: str, language: str = "zh") -> List[str]: + """并行生成多个幻灯片内容 + + Args: + slide_requests: 幻灯片请求列表,每个包含slide_title等信息 + scenario: 场景 + topic: 主题 + language: 语言 + + Returns: + 生成的幻灯片内容列表 + """ + try: + # 检查是否启用并行生成 + if not ai_config.enable_parallel_generation: + # 如果未启用并行生成,则顺序生成 + results = [] + for req in slide_requests: + content = await self.generate_slide_content( + req.get('slide_title', req.get('title', '')), + scenario, + topic, + language + ) + results.append(content) + return results + + # 并行生成 + tasks = [] + for req in slide_requests: + task = self.generate_slide_content( + req.get('slide_title', req.get('title', '')), + scenario, + topic, + language + ) + tasks.append(task) + + # 等待所有任务完成 + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 处理异常结果 + processed_results = [] + for i, result in enumerate(results): + if isinstance(result, Exception): + logger.error(f"生成第 {i+1} 个幻灯片时出错: {str(result)}") + # 使用默认内容作为后备 + slide_title = slide_requests[i].get('slide_title', slide_requests[i].get('title', '')) + processed_results.append(f"• {slide_title}的相关内容\n• 详细说明和分析\n• 实际应用案例") + else: + processed_results.append(result) + + logger.info(f"并行生成完成:成功生成 {len([r for r in results if not isinstance(r, Exception)])} / {len(results)} 个幻灯片") + return processed_results + + except Exception as e: + logger.error(f"并行生成幻灯片失败: {str(e)}") + # 降级到顺序生成 + results = [] + for req in slide_requests: + try: + content = await self.generate_slide_content( + req.get('slide_title', req.get('title', '')), + scenario, + topic, + language + ) + results.append(content) + except Exception as slide_error: + logger.error(f"生成幻灯片失败: {str(slide_error)}") + slide_title = req.get('slide_title', req.get('title', '')) + results.append(f"• {slide_title}的相关内容\n• 详细说明和分析\n• 实际应用案例") + return results + + async def generate_slide_content(self, slide_title: str, scenario: str, topic: str, language: str = "zh") -> str: + """Generate slide content using AI""" + try: + prompt = self._create_slide_content_prompt(slide_title, scenario, topic, language) + + response = await self._text_completion_for_role("slide_generation", + prompt=prompt, + max_tokens=ai_config.max_tokens, # Use smaller limit for slide content + temperature=ai_config.temperature + ) + + return response.content.strip() + + except Exception as e: + logger.error(f"Error generating slide content: {str(e)}") + # Fallback to original method + return self._generate_slide_content(topic, slide_title, scenario, language) + + async def enhance_content_with_ai(self, content: str, scenario: str, language: str = "zh") -> str: + """Enhance existing content using AI""" + try: + prompt = self._create_enhancement_prompt(content, scenario, language) + + response = await self._text_completion_for_role("outline", + prompt=prompt, + max_tokens=ai_config.max_tokens, # Use smaller limit for content enhancement + temperature=max(ai_config.temperature - 0.1, 0.1) # Slightly lower temperature for enhancement + ) + + return response.content.strip() + + except Exception as e: + logger.error(f"Error enhancing content: {str(e)}") + return content # Return original content if enhancement fails + + + + + def _create_outline_prompt(self, request: PPTGenerationRequest, research_context: str = "", page_count_settings: Dict[str, Any] = None) -> str: + """Create prompt for AI outline generation - Enhanced with professional templates""" + scenario_descriptions = { + "general": "通用演示", + "tourism": "旅游观光介绍", + "education": "儿童科普教育", + "analysis": "深入数据分析", + "history": "历史文化主题", + "technology": "科技技术展示", + "business": "方案汇报" + } + + scenario_desc = scenario_descriptions.get(request.scenario, "通用演示") + + # Handle page count requirements + page_count_instruction = "" + expected_page_count = 10 # Default page count + + if page_count_settings: + page_count_mode = page_count_settings.get('mode', 'ai_decide') + + if page_count_mode == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + page_count_instruction = f"- 页数要求:必须严格生成{min_pages}-{max_pages}页的PPT,确保页数在此范围内" + expected_page_count = max_pages # Use max for template + elif page_count_mode == 'fixed': + fixed_pages = page_count_settings.get('fixed_pages', 10) + page_count_instruction = f"- 页数要求:必须生成恰好{fixed_pages}页的PPT" + expected_page_count = fixed_pages + else: + page_count_instruction = "- 页数要求:根据内容复杂度自主决定合适的页数" + expected_page_count = 12 # Default for AI decide + else: + page_count_instruction = "- 页数要求:根据内容复杂度自主决定合适的页数" + expected_page_count = 12 + logger.debug(f"Page count instruction: {page_count_instruction}") + + # Add research context if available + research_section = "" + if research_context: + research_section = f""" + +基于深度研究的背景信息: +{research_context} + +请充分利用以上研究信息来丰富PPT内容,确保信息准确、权威、具有深度。""" + + # Get target audience and style information + target_audience = getattr(request, 'target_audience', None) or '普通大众' + ppt_style = getattr(request, 'ppt_style', None) or 'general' + custom_style_prompt = getattr(request, 'custom_style_prompt', None) + description = getattr(request, 'description', None) + language = getattr(request, 'language', None) + + # Create style description + style_descriptions = { + "general": "通用风格,详细专业", + "conference": "学术会议风格,严谨正式", + "custom": custom_style_prompt or "自定义风格" + } + style_desc = style_descriptions.get(ppt_style, "通用风格") + + # Add custom style prompt if provided (regardless of ppt_style) + if custom_style_prompt and ppt_style != "custom": + style_desc += f",{custom_style_prompt}" + + # Use the new prompts module + if request.language == "zh": + return prompts_manager.get_outline_prompt_zh( + topic=request.topic, + scenario_desc=scenario_desc, + target_audience=target_audience, + style_desc=style_desc, + requirements=request.requirements or '', + description=description or '', + research_section=research_section, + page_count_instruction=page_count_instruction, + expected_page_count=expected_page_count, + language=language or 'zh' + ) + else: + return prompts_manager.get_outline_prompt_en( + topic=request.topic, + scenario_desc=scenario_desc, + target_audience=target_audience, + style_desc=style_desc, + requirements=request.requirements or '', + description=description or '', + research_section=research_section, + page_count_instruction=page_count_instruction, + expected_page_count=expected_page_count, + language=language or 'en' + ) + + def _create_slide_content_prompt(self, slide_title: str, scenario: str, topic: str, language: str) -> str: + """Create prompt for slide content generation""" + if language == "zh": + return prompts_manager.get_slide_content_prompt_zh(slide_title, scenario, topic) + else: + return prompts_manager.get_slide_content_prompt_en(slide_title, scenario, topic) + + def _create_enhancement_prompt(self, content: str, scenario: str, language: str) -> str: + """Create prompt for content enhancement""" + if language == "zh": + return prompts_manager.get_enhancement_prompt_zh(content, scenario) + else: + return prompts_manager.get_enhancement_prompt_en(content, scenario) + + def _parse_ai_outline(self, ai_response: str, request: PPTGenerationRequest) -> PPTOutline: + """Parse AI response to create structured outline""" + try: + import json + import re + + # 首先尝试解析JSON格式的响应 + json_str = None + + # 方法1: 尝试提取```json```代码块中的内容 + json_block_match = re.search(r'```json\s*(\{.*?\})\s*```', ai_response, re.DOTALL) + if json_block_match: + json_str = json_block_match.group(1) + logger.info("从```json```代码块中提取大纲JSON") + else: + # 方法2: 尝试提取```代码块中的内容(不带json标识) + code_block_match = re.search(r'```\s*(\{.*?\})\s*```', ai_response, re.DOTALL) + if code_block_match: + json_str = code_block_match.group(1) + logger.info("从```代码块中提取大纲JSON") + else: + # 方法3: 尝试提取完整的JSON对象 + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', ai_response, re.DOTALL) + if json_match: + json_str = json_match.group() + logger.info("使用正则表达式提取大纲JSON") + + if json_str: + try: + # 清理JSON字符串 + json_str = json_str.strip() + json_str = re.sub(r',\s*}', '}', json_str) # 移除}前的多余逗号 + json_str = re.sub(r',\s*]', ']', json_str) # 移除]前的多余逗号 + + json_data = json.loads(json_str) + if 'slides' in json_data: + logger.info(f"Successfully parsed JSON outline with {len(json_data['slides'])} slides") + + # 标准化slides格式以确保兼容性 + standardized_data = self._standardize_outline_format(json_data) + + # 确保metadata包含必要字段 + metadata = standardized_data.get("metadata", {}) + metadata.update({ + "scenario": request.scenario, + "language": request.language, + "total_slides": len(standardized_data.get("slides", [])), + "generated_with_ai": True, + "ai_provider": self.provider_name + }) + + return PPTOutline( + title=standardized_data.get("title", request.topic), + slides=standardized_data.get("slides", []), + metadata=metadata + ) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse extracted JSON: {e}") + pass + + # Fallback: 解析文本格式的大纲 + logger.info("JSON解析失败,使用文本解析方式") + lines = ai_response.strip().split('\n') + title = request.topic + slides = [] + + # Extract title if present + for line in lines: + if line.startswith('标题:') or line.startswith('Title:'): + title = line.split(':', 1)[-1].split(':', 1)[-1].strip() + break + + # Parse slide structure + page_number = 1 + + for line in lines: + line = line.strip() + if not line: + continue + + # Look for numbered items (slide structure) + if re.match(r'^\d+\.', line): + # Extract slide title and description + parts = line.split(' - ', 1) + if len(parts) == 2: + slide_title = parts[0].split('.', 1)[1].strip() + slide_desc = parts[1].strip() + else: + slide_title = line.split('.', 1)[1].strip() + slide_desc = "" + + # Determine slide type + slide_type = "content" + if "封面" in slide_title or "title" in slide_title.lower(): + slide_type = "title" + elif "目录" in slide_title or "agenda" in slide_title.lower(): + slide_type = "agenda" + elif "谢谢" in slide_title or "thank" in slide_title.lower(): + slide_type = "thankyou" + + # 使用与文件生成一致的格式 + slides.append({ + "page_number": page_number, + "title": slide_title, + "content_points": [slide_desc] if slide_desc else ["内容要点"], + "slide_type": slide_type, + "type": slide_type, # 添加type字段以兼容不同的访问方式 + "description": slide_desc + }) + + page_number += 1 + + # If no slides were parsed, throw error instead of creating default structure + if not slides: + raise Exception("AI生成的大纲内容为空或无法识别有效的幻灯片结构") + + return PPTOutline( + title=title, + slides=slides, + metadata={ + "scenario": request.scenario, + "language": request.language, + "total_slides": len(slides), + "generated_with_ai": True, + "ai_provider": self.provider_name + } + ) + + except Exception as e: + logger.error(f"Error parsing AI outline: {str(e)}") + # 不再使用默认大纲,直接抛出异常 + raise Exception(f"AI生成的大纲格式无效,无法解析:{str(e)}") + + def _create_default_slides(self, title: str, request: PPTGenerationRequest) -> List[Dict[str, Any]]: + """Create default slide structure when AI parsing fails (legacy format)""" + return [ + { + "id": 1, + "type": "title", + "title": title, + "subtitle": "专业演示" if request.language == "zh" else "Professional Presentation", + "content": "" + }, + { + "id": 2, + "type": "agenda", + "title": "目录" if request.language == "zh" else "Agenda", + "subtitle": "", + "content": "• 主要内容概览\n• 核心要点分析\n• 总结与展望" + }, + { + "id": 3, + "type": "content", + "title": "主要内容" if request.language == "zh" else "Main Content", + "subtitle": "", + "content": f"• 关于{title}的核心要点\n• 详细分析和说明\n• 实际应用案例" + }, + { + "id": 4, + "type": "thankyou", + "title": "谢谢" if request.language == "zh" else "Thank You", + "subtitle": "感谢聆听" if request.language == "zh" else "Thank you for your attention", + "content": "" + } + ] + + def _create_default_slides_compatible(self, title: str, request: PPTGenerationRequest) -> List[Dict[str, Any]]: + """Create default slide structure compatible with file generation format""" + return [ + { + "page_number": 1, + "title": title, + "content_points": ["专业演示" if request.language == "zh" else "Professional Presentation"], + "slide_type": "title", + "type": "title", + "description": "PPT标题页" + }, + { + "page_number": 2, + "title": "目录" if request.language == "zh" else "Agenda", + "content_points": ["主要内容概览", "核心要点分析", "总结与展望"], + "slide_type": "agenda", + "type": "agenda", + "description": "PPT目录页" + }, + { + "page_number": 3, + "title": "主要内容" if request.language == "zh" else "Main Content", + "content_points": [f"关于{title}的核心要点", "详细分析和说明", "实际应用案例"], + "slide_type": "content", + "type": "content", + "description": "主要内容页" + }, + { + "page_number": 4, + "title": "谢谢" if request.language == "zh" else "Thank You", + "content_points": ["感谢聆听" if request.language == "zh" else "Thank you for your attention"], + "slide_type": "thankyou", + "type": "thankyou", + "description": "PPT结束页" + } + ] + + def _create_default_outline(self, request: PPTGenerationRequest) -> PPTOutline: + """Create default outline when AI generation fails""" + slides = self._create_default_slides(request.topic, request) + + return PPTOutline( + title=request.topic, + slides=slides, + metadata={ + "scenario": request.scenario, + "language": request.language, + "total_slides": len(slides), + "generated_with_ai": False, + "fallback_used": True + } + ) + + # New project-based methods + async def create_project_with_workflow(self, request: PPTGenerationRequest) -> PPTProject: + """Create a new project with complete TODO workflow""" + try: + # Create project with TODO board + project = await self.project_manager.create_project(request) + + # Start the workflow + await self._execute_project_workflow(project.project_id, request) + + return project + + except Exception as e: + logger.error(f"Error creating project with workflow: {str(e)}") + raise + + async def _execute_project_workflow(self, project_id: str, request: PPTGenerationRequest): + """Execute the complete project workflow with sequential subtask processing""" + try: + # Get project to check if requirements are confirmed + project = await self.project_manager.get_project(project_id) + if not project: + raise ValueError("Project not found") + + # Only execute if requirements are confirmed + if not project.confirmed_requirements: + logger.info(f"Project {project_id} workflow waiting for requirements confirmation") + return + + # Get TODO board to access stages and subtasks + todo_board = await self.project_manager.get_todo_board(project_id) + if not todo_board: + raise ValueError("TODO board not found for project") + + # Process each stage sequentially (skip requirements confirmation stage) + for stage_index, stage in enumerate(todo_board.stages): + # Skip requirements confirmation stage as it's already done + if stage.id == "requirements_confirmation": + continue + + logger.info(f"Starting stage {stage_index + 1}: {stage.name}") + + # Mark stage as running + await self.project_manager.update_stage_status( + project_id, stage.id, "running", 0.0 + ) + + # Execute the complete stage as a single task + try: + stage_result = await self._execute_complete_stage(project_id, stage.id, request) + except Exception as e: + logger.error(f"Error executing stage '{stage.name}': {str(e)}") + # Mark stage as failed but continue with next stage + await self.project_manager.update_stage_status( + project_id, stage.id, "failed", 0.0, {"error": str(e)} + ) + continue + # Wrap string result in dictionary for proper serialization + result_dict = {"message": stage_result} if isinstance(stage_result, str) else stage_result + await self.project_manager.update_stage_status( + project_id, stage.id, "completed", 100.0, result_dict + ) + + logger.info(f"Completed stage: {stage.name}") + + # Mark project as completed + await self.project_manager.update_project_status(project_id, "completed") + logger.info(f"Project workflow completed: {project_id}") + + except Exception as e: + logger.error(f"Error in project workflow: {str(e)}") + # Mark current stage as failed + todo_board = await self.project_manager.get_todo_board(project_id) + if todo_board and todo_board.current_stage_index < len(todo_board.stages): + current_stage = todo_board.stages[todo_board.current_stage_index] + await self.project_manager.update_stage_status( + project_id, current_stage.id, "failed", 0.0, + {"error": str(e)} + ) + + async def _execute_complete_stage(self, project_id: str, stage_id: str, request: PPTGenerationRequest): + """Execute a complete stage as a single task""" + try: + logger.info(f"Executing complete stage: {stage_id}") + + # Get project and confirmed requirements + project = await self.project_manager.get_project(project_id) + if not project or not project.confirmed_requirements: + raise ValueError("Project or confirmed requirements not found") + + confirmed_requirements = project.confirmed_requirements + + # Execute based on stage type + if stage_id == "outline_generation": + return await self._execute_outline_generation(project_id, confirmed_requirements, self._load_prompts_md_system_prompt()) + elif stage_id == "ppt_creation": + return await self._execute_ppt_creation(project_id, confirmed_requirements, self._load_prompts_md_system_prompt()) + else: + # Fallback for other stages + return await self._execute_general_stage(project_id, stage_id, confirmed_requirements) + + except Exception as e: + logger.error(f"Error executing complete stage '{stage_id}': {str(e)}") + raise + + async def _execute_general_stage(self, project_id: str, stage_id: str, confirmed_requirements: Dict[str, Any]): + """Execute a general stage task""" + try: + system_prompt = self._load_prompts_md_system_prompt() + + context = f""" +项目信息: +- 主题:{confirmed_requirements['topic']} +- 类型:{confirmed_requirements['type']} +- 其他说明:{confirmed_requirements.get('description', '无')} + +当前阶段:{stage_id} + +请根据以上信息完成当前阶段的任务。 +""" + + response = await self._text_completion_for_role("default", + prompt=context, + system_prompt=system_prompt, + max_tokens=ai_config.max_tokens, + temperature=ai_config.temperature + ) + + return {"message": response.content} + + except Exception as e: + logger.error(f"Error executing general stage '{stage_id}': {str(e)}") + raise + + async def _complete_stage(self, project_id: str, stage_id: str, + request: PPTGenerationRequest) -> Dict[str, Any]: + """Complete a stage and return its result""" + try: + if stage_id == "outline_generation": + outline = await self.generate_outline(request) + return {"outline": outline.dict()} + + elif stage_id == "theme_design": + theme_config = await self._design_theme(request.scenario, request.language) + return {"theme_config": theme_config} + + elif stage_id == "content_generation": + # Get outline from previous stage + project = await self.project_manager.get_project(project_id) + if project and project.outline: + enhanced_slides = await self._generate_enhanced_content(project.outline, request) + return {"enhanced_slides": [slide.dict() for slide in enhanced_slides]} + else: + # Fallback: generate basic outline first + outline = await self.generate_outline(request) + enhanced_slides = await self._generate_enhanced_content(outline, request) + return {"enhanced_slides": [slide.dict() for slide in enhanced_slides]} + + elif stage_id == "layout_verification": + # Get slides from previous stage + todo_board = await self.project_manager.get_todo_board(project_id) + if todo_board: + for stage in todo_board.stages: + if stage.id == "content_generation" and stage.result: + slides_data = stage.result.get("enhanced_slides", []) + slides = [SlideContent(**slide_data) for slide_data in slides_data] + theme_config = {} + for s in todo_board.stages: + if s.id == "theme_design" and s.result: + theme_config = s.result.get("theme_config", {}) + break + verified_slides = await self._verify_layout(slides, theme_config) + return {"verified_slides": [slide.dict() for slide in verified_slides]} + return {"verified_slides": []} + + elif stage_id == "export_output": + # Get verified slides and generate HTML + todo_board = await self.project_manager.get_todo_board(project_id) + if todo_board: + slides_data = [] + theme_config = {} + + for stage in todo_board.stages: + if stage.id == "layout_verification" and stage.result: + slides_data = stage.result.get("verified_slides", []) + elif stage.id == "theme_design" and stage.result: + theme_config = stage.result.get("theme_config", {}) + + if slides_data: + slides = [SlideContent(**slide_data) for slide_data in slides_data] + html_content = await self._generate_html_output(slides, theme_config) + + # Update project with final results + project = await self.project_manager.get_project(project_id) + if project: + project.slides_html = html_content + + # Save version + await self.project_manager.save_project_version( + project_id, + { + "slides_html": html_content, + "theme_config": theme_config + } + ) + + return {"html_content": html_content} + + return {"html_content": ""} + + else: + return {"message": f"Stage {stage_id} completed"} + + except Exception as e: + logger.error(f"Error completing stage '{stage_id}': {str(e)}") + return {"error": str(e)} + + async def generate_outline_streaming(self, project_id: str): + """Generate outline with streaming output""" + try: + project = await self.project_manager.get_project(project_id) + if not project: + raise ValueError("Project not found") + + # 检查是否已经有从文件生成的大纲 + file_generated_outline = None + if project.confirmed_requirements and project.confirmed_requirements.get('file_generated_outline'): + file_generated_outline = project.confirmed_requirements['file_generated_outline'] + logger.info(f"Project {project_id} has file-generated outline, using it") + elif project.outline and project.outline.get('slides') and project.outline.get('metadata', {}).get('generated_with_summeryfile'): + file_generated_outline = project.outline + logger.info(f"Project {project_id} already has outline generated from file, using existing outline") + + if file_generated_outline: + # 直接流式输出已有的大纲 + import json + existing_outline = { + "title": file_generated_outline.get('title', project.topic), + "slides": file_generated_outline.get('slides', []), + "metadata": file_generated_outline.get('metadata', {}) + } + + # 确保元数据包含正确的标识 + if 'metadata' not in existing_outline: + existing_outline['metadata'] = {} + existing_outline['metadata']['generated_with_summeryfile'] = True + existing_outline['metadata']['generated_at'] = time.time() + + formatted_json = json.dumps(existing_outline, ensure_ascii=False, indent=2) + + # Stream the existing outline + for i, char in enumerate(formatted_json): + yield f"data: {json.dumps({'content': char})}\n\n" + if i % 10 == 0: + await asyncio.sleep(0.02) # Faster streaming for existing content + + # 保存大纲到项目中 - 直接保存结构化数据 + project.outline = existing_outline # 直接保存结构化数据,而不是包装格式 + project.updated_at = time.time() + + # 立即保存到数据库 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, project.outline) + + if save_success: + logger.info(f"✅ Successfully saved file-generated outline to database for project {project_id}") + # 同时更新内存中的项目管理器 + self.project_manager.projects[project_id] = project + else: + logger.error(f"❌ Failed to save file-generated outline to database for project {project_id}") + + except Exception as save_error: + logger.error(f"❌ Exception while saving file-generated outline: {str(save_error)}") + import traceback + traceback.print_exc() + + # Update stage status + await self._update_outline_generation_stage(project_id, existing_outline) + # Send completion signal + yield f"data: {json.dumps({'done': True})}\n\n" + return + + # Update project status to in_progress + await self.project_manager.update_project_status(project_id, "in_progress") + + # Update TODO board stage status + if project.todo_board: + for stage in project.todo_board.stages: + if stage.id == "outline_generation": + stage.status = "running" + break + + import json + # time 模块已经在文件顶部导入,不需要重复导入 + + # 构建基于确认需求的提示词 + confirmed_requirements = project.confirmed_requirements or {} + + # 检查是否启用了联网模式并进行DEEP research + research_context = "" + network_mode = False + if project.project_metadata and isinstance(project.project_metadata, dict): + network_mode = project.project_metadata.get("network_mode", False) + + if network_mode and self.research_service and self.research_service.is_available(): + logger.info(f"🔍 Project {project_id} has network mode enabled, starting DEEP research for topic: {project.topic}") + try: + # Prepare research context from confirmed requirements + research_context_data = { + 'scenario': project.scenario, + 'target_audience': confirmed_requirements.get('target_audience', '普通大众'), + 'requirements': project.requirements, + 'ppt_style': confirmed_requirements.get('ppt_style', 'general'), + 'description': confirmed_requirements.get('description', '') + } + + # Conduct DEEP research with context + research_report = await self.research_service.conduct_deep_research( + topic=project.topic, + language="zh", # Default to Chinese for now + context=research_context_data + ) + + # Generate structured Markdown research context + research_context = self._create_research_context(research_report) + logger.info(f"✅ DEEP research completed successfully for project {project_id}") + + # Save research report if generator is available + if self.report_generator: + try: + report_path = self.report_generator.save_report_to_file(research_report) + logger.info(f"📄 Research report saved to: {report_path}") + except Exception as save_error: + logger.warning(f"Failed to save research report: {save_error}") + + # 如果有研究内容,保存为临时文件并使用现有的文件处理流程 + if research_context: + logger.info(f"🎯 Using research-based outline generation via file processing for project {project_id}") + + # 在线程池中保存研究内容为临时Markdown文件 + temp_research_file = await run_blocking_io( + self._save_research_to_temp_file, research_context + ) + + logger.info(f"📄 Research content saved to temporary file: {temp_research_file}") + logger.info(f"📊 Research content stats: {len(research_context)} chars, {len(research_context.split())} words") + + try: + # 创建文件大纲生成请求,使用现有的generate_outline_from_file方法 + from ..api.models import FileOutlineGenerationRequest + + file_request = FileOutlineGenerationRequest( + file_path=temp_research_file, + filename=f"research_{project.topic}.md", + topic=confirmed_requirements.get('topic', project.topic), + scenario=confirmed_requirements.get('type', project.scenario), + requirements=confirmed_requirements.get('requirements', project.requirements), + language="zh", + page_count_mode=confirmed_requirements.get('page_count_settings', {}).get('mode', 'ai_decide'), + min_pages=confirmed_requirements.get('page_count_settings', {}).get('min_pages', 8), + max_pages=confirmed_requirements.get('page_count_settings', {}).get('max_pages', 15), + fixed_pages=confirmed_requirements.get('page_count_settings', {}).get('fixed_pages', 10), + ppt_style=confirmed_requirements.get('ppt_style', 'general'), + custom_style_prompt=confirmed_requirements.get('custom_style_prompt'), + target_audience=confirmed_requirements.get('target_audience', '普通大众'), + custom_audience=confirmed_requirements.get('custom_audience'), + file_processing_mode="markitdown", # 使用markitdown处理Markdown文件 + content_analysis_depth="fast" # 使用快速分析策略,适合研究报告处理 + ) + + # 使用现有的文件处理方法生成大纲(采用快速分块策略) + logger.info(f"🚀 Using fast chunking strategy for research-based outline generation") + logger.info(f"📊 File processing config: mode={file_request.file_processing_mode}, depth={file_request.content_analysis_depth}") + + outline_response = await self.generate_outline_from_file(file_request) + + if outline_response.success and outline_response.outline: + structured_outline = outline_response.outline + + # 添加研究增强标识 + if 'metadata' not in structured_outline: + structured_outline['metadata'] = {} + structured_outline['metadata']['research_enhanced'] = True + structured_outline['metadata']['research_duration'] = research_report.total_duration + structured_outline['metadata']['research_sources'] = len(research_report.sources) + structured_outline['metadata']['generated_from_research_file'] = True + structured_outline['metadata']['generated_at'] = time.time() + + # 流式输出研究增强的大纲 + formatted_json = json.dumps(structured_outline, ensure_ascii=False, indent=2) + for i, char in enumerate(formatted_json): + yield f"data: {json.dumps({'content': char})}\n\n" + if i % 10 == 0: + await asyncio.sleep(0.05) + + # 保存大纲 + project.outline = structured_outline + project.updated_at = time.time() + + # 保存到数据库 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, project.outline) + if save_success: + logger.info(f"✅ Successfully saved research-enhanced outline to database for project {project_id}") + self.project_manager.projects[project_id] = project + else: + logger.error(f"❌ Failed to save research-enhanced outline to database for project {project_id}") + except Exception as save_error: + logger.error(f"❌ Exception while saving research-enhanced outline: {str(save_error)}") + + # 更新阶段状态 + await self._update_outline_generation_stage(project_id, structured_outline) + + # 发送完成信号 + yield f"data: {json.dumps({'done': True})}\n\n" + return + else: + logger.warning(f"Failed to generate outline from research file, falling back to normal generation") + + finally: + # 清理临时文件 + try: + # 在线程池中清理临时文件 + await run_blocking_io(self._cleanup_temp_file, temp_research_file) + logger.info(f"Cleaned up temporary research file: {temp_research_file}") + except Exception as cleanup_error: + logger.warning(f"Failed to cleanup temporary research file: {cleanup_error}") + + except Exception as research_error: + logger.warning(f"DEEP research failed for project {project_id}, proceeding without research context: {research_error}") + research_context = "" + else: + if network_mode: + logger.warning(f"Project {project_id} has network mode enabled but research service is not available") + else: + logger.info(f"Project {project_id} does not have network mode enabled") + + # 处理页数设置 + page_count_settings = confirmed_requirements.get('page_count_settings', {}) + page_count_mode = page_count_settings.get('mode', 'ai_decide') + + page_count_instruction = "" + if page_count_mode == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + page_count_instruction = f"- 页数要求:必须严格生成{min_pages}-{max_pages}页的PPT,确保页数在此范围内" + elif page_count_mode == 'fixed': + fixed_pages = page_count_settings.get('fixed_pages', 10) + page_count_instruction = f"- 页数要求:必须生成恰好{fixed_pages}页的PPT" + else: + page_count_instruction = "- 页数要求:根据内容复杂度自主决定合适的页数(建议8-15页)" + + # Generate outline using AI - 使用字符串拼接避免f-string中的花括号冲突 + topic = confirmed_requirements.get('topic', project.topic) + target_audience = confirmed_requirements.get('target_audience', '普通大众') + ppt_style = confirmed_requirements.get('ppt_style', 'general') + + # Add research context if available + research_section = "" + if research_context: + research_section = """ + +基于深度研究的背景信息: +""" + research_context + """ + +请充分利用以上研究信息来丰富PPT内容,确保信息准确、权威、具有深度。""" + + # 使用新的提示词模块 + prompt = prompts_manager.get_streaming_outline_prompt( + topic=topic, + target_audience=target_audience, + ppt_style=ppt_style, + page_count_instruction=page_count_instruction, + research_section=research_section + ) + + # Generate outline content directly without initial message + try: + response = await self._text_completion_for_role("outline", + prompt=prompt, + max_tokens=ai_config.max_tokens, + temperature=ai_config.temperature + ) + + # Get the AI response content + content = response.content.strip() + + # 检查AI响应是否为空或无效 + if not content or len(content.strip()) < 10: + error_message = "AI生成的内容为空或过短,请重新生成大纲。" + yield f"data: {json.dumps({'error': error_message})}\n\n" + return + + except Exception as ai_error: + logger.error(f"AI provider error during outline generation: {str(ai_error)}") + # 根据错误类型提供更具体的错误信息 + if "timeout" in str(ai_error).lower() or "request timed out" in str(ai_error).lower(): + error_message = "AI服务响应超时,请检查网络连接后重新生成大纲。" + elif "api" in str(ai_error).lower() and "error" in str(ai_error).lower(): + error_message = "AI服务暂时不可用,请稍后重新生成大纲。" + else: + error_message = f"AI生成大纲失败:{str(ai_error)}。请重新生成大纲。" + yield f"data: {json.dumps({'error': error_message})}\n\n" + return + + # Import re for regex operations + import re + + # 初始化structured_outline变量 + structured_outline = None + + # Try to parse as JSON first with validation and repair + try: + # Extract JSON from response if it contains extra text + json_match = re.search(r'\{.*\}', content, re.DOTALL) + if json_match: + json_str = json_match.group() + structured_outline = json.loads(json_str) + else: + structured_outline = json.loads(content) + + # Validate and repair the JSON structure + structured_outline = await self._validate_and_repair_outline_json(structured_outline, confirmed_requirements) + + # 验证页数是否符合要求 + actual_page_count = len(structured_outline.get('slides', [])) + if page_count_mode == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + if actual_page_count < min_pages or actual_page_count > max_pages: + logger.warning(f"Generated outline has {actual_page_count} pages, but expected {min_pages}-{max_pages} pages") + # 可以选择重新生成或调整,这里先记录警告 + elif page_count_mode == 'fixed': + fixed_pages = page_count_settings.get('fixed_pages', 10) + if actual_page_count != fixed_pages: + logger.warning(f"Generated outline has {actual_page_count} pages, but expected exactly {fixed_pages} pages") + + # 添加元数据 + structured_outline['metadata'] = { + 'generated_with_summeryfile': False, + 'page_count_settings': page_count_settings, + 'actual_page_count': actual_page_count, + 'generated_at': time.time() + } + + # Format the JSON for display + formatted_json = json.dumps(structured_outline, ensure_ascii=False, indent=2) + + # Stream the formatted JSON character by character + for i, char in enumerate(formatted_json): + yield f"data: {json.dumps({'content': char})}\n\n" + + # Add small delay for streaming effect + if i % 10 == 0: # Every 10 characters + await asyncio.sleep(0.05) + + # Store the structured data directly + project.outline = structured_outline # 直接保存结构化数据 + project.updated_at = time.time() + + # 立即保存到数据库 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, project.outline) + + if save_success: + logger.info(f"✅ Successfully saved outline to database during streaming for project {project_id}") + # 同时更新内存中的项目管理器 + self.project_manager.projects[project_id] = project + else: + logger.error(f"❌ Failed to save outline to database during streaming for project {project_id}") + + except Exception as save_error: + logger.error(f"❌ Exception while saving outline during streaming: {str(save_error)}") + import traceback + traceback.print_exc() + + # 大纲生成完成后,立即生成母版模板(JSON解析成功的情况) + await self._update_outline_generation_stage(project_id, structured_outline) + + except Exception as parse_error: + logger.error(f"Failed to parse AI response as JSON: {parse_error}") + logger.error(f"AI response content: {content[:500]}...") + + # 不再使用fallback默认大纲,直接抛出错误 + error_message = f"AI生成的大纲格式无效,无法解析。请重新生成大纲。" + yield f"data: {json.dumps({'error': error_message})}\n\n" + return + + except Exception as e: + logger.error(f"Error in outline streaming generation: {str(e)}") + # 检查是否是AI提供商的错误(如超时、API错误等) + if "timeout" in str(e).lower() or "api error" in str(e).lower() or "request timed out" in str(e).lower(): + error_message = f'AI服务暂时不可用:{str(e)}。请稍后重试或检查网络连接。' + else: + error_message = f'生成大纲时出现错误:{str(e)}' + yield f"data: {json.dumps({'error': error_message})}\n\n" + + async def _validate_and_repair_outline_json(self, outline_data: Dict[str, Any], confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """验证大纲JSON数据的正确性,如果有错误则调用AI修复,最多修复10次""" + try: + # 第一步:基本结构验证 + logger.info(f"outline_data: {outline_data}") + validation_errors = self._validate_outline_structure(outline_data, confirmed_requirements) + + if not validation_errors: + logger.info("大纲JSON验证通过,无需修复") + return outline_data + + logger.warning(f"大纲JSON验证发现 {len(validation_errors)} 个错误,开始AI修复") + + # 第二步:调用AI修复,最多修复10次 + max_repair_attempts = 10 + current_attempt = 1 + + while current_attempt <= max_repair_attempts: + logger.info(f"第 {current_attempt} 次AI修复尝试") + + try: + repaired_outline = await self._repair_outline_with_ai(outline_data, validation_errors, confirmed_requirements) + + # 验证修复后的结果 + repair_validation_errors = self._validate_outline_structure(repaired_outline, confirmed_requirements) + + if not repair_validation_errors: + logger.info(f"AI修复成功,第 {current_attempt} 次尝试通过验证") + return repaired_outline + else: + logger.warning(f"第 {current_attempt} 次AI修复后仍有 {len(repair_validation_errors)} 个错误") + validation_errors = repair_validation_errors + outline_data = repaired_outline + + except Exception as repair_error: + logger.error(f"第 {current_attempt} 次AI修复失败: {str(repair_error)}") + + current_attempt += 1 + + # 如果10次修复都失败,直接输出JSON + logger.warning("AI修复达到最大尝试次数(10次),直接输出当前JSON") + return outline_data + + except Exception as e: + logger.error(f"验证和修复过程出错: {str(e)}") + # 如果验证修复过程出错,直接输出原始JSON + return outline_data + + def _validate_outline_structure(self, outline_data: Dict[str, Any], confirmed_requirements: Dict[str, Any]) -> List[str]: + """验证大纲结构,返回错误列表""" + errors = [] + + try: + # 1. 检查必需字段 + if not isinstance(outline_data, dict): + errors.append("大纲数据必须是字典格式") + return errors + + if 'slides' not in outline_data: + errors.append("缺少必需字段: slides") + return errors + + if 'title' not in outline_data: + errors.append("缺少必需字段: title") + + # 2. 检查slides字段 + slides = outline_data.get('slides', []) + if not isinstance(slides, list): + errors.append("slides字段必须是列表格式") + return errors + + if len(slides) == 0: + errors.append("slides列表不能为空") + return errors + + # 3. 检查页数要求 + page_count_settings = confirmed_requirements.get('page_count_settings', {}) + page_count_mode = page_count_settings.get('mode', 'ai_decide') + actual_page_count = len(slides) + + if page_count_mode == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + if actual_page_count < min_pages: + errors.append(f"页数不足:当前{actual_page_count}页,要求至少{min_pages}页") + elif actual_page_count > max_pages: + errors.append(f"页数过多:当前{actual_page_count}页,要求最多{max_pages}页") + elif page_count_mode == 'fixed': + fixed_pages = page_count_settings.get('fixed_pages', 10) + if actual_page_count != fixed_pages: + errors.append(f"页数不匹配:当前{actual_page_count}页,要求恰好{fixed_pages}页") + + # 4. 检查每个slide的结构 + for i, slide in enumerate(slides): + slide_errors = self._validate_slide_structure(slide, i + 1) + errors.extend(slide_errors) + + # 5. 检查页码连续性 + page_numbers = [slide.get('page_number', 0) for slide in slides] + expected_numbers = list(range(1, len(slides) + 1)) + if page_numbers != expected_numbers: + expected_str = ', '.join(map(str, expected_numbers)) + actual_str = ', '.join(map(str, page_numbers)) + errors.append(f"页码不连续:期望[{expected_str}],实际[{actual_str}]") + + return errors + + except Exception as e: + errors.append(f"验证过程出错: {str(e)}") + return errors + + def _validate_slide_structure(self, slide: Dict[str, Any], slide_index: int) -> List[str]: + """验证单个slide的结构""" + errors = [] + + try: + if not isinstance(slide, dict): + errors.append(f"第{slide_index}页:slide必须是字典格式") + return errors + + # 检查必需字段 + required_fields = ['page_number', 'title', 'content_points', 'slide_type'] + for field in required_fields: + if field not in slide: + errors.append(f"第{slide_index}页:缺少必需字段 {field}") + + # 检查字段类型和值 + if 'page_number' in slide: + page_num = slide['page_number'] + if not isinstance(page_num, int) or page_num != slide_index: + errors.append(f"第{slide_index}页:page_number应为{slide_index},实际为{page_num}") + + if 'title' in slide: + title = slide['title'] + if not isinstance(title, str) or not title.strip(): + errors.append(f"第{slide_index}页:title必须是非空字符串") + + if 'content_points' in slide: + content_points = slide['content_points'] + if not isinstance(content_points, list): + errors.append(f"第{slide_index}页:content_points必须是列表格式") + elif len(content_points) == 0: + errors.append(f"第{slide_index}页:content_points不能为空") + else: + for j, point in enumerate(content_points): + if not isinstance(point, str) or not point.strip(): + errors.append(f"第{slide_index}页:content_points[{j}]必须是非空字符串") + + if 'slide_type' in slide: + slide_type = slide['slide_type'] + valid_types = ['title', 'content', 'agenda', 'thankyou'] + if slide_type not in valid_types: + valid_types_str = ', '.join(valid_types) + errors.append(f"第{slide_index}页:slide_type必须是{valid_types_str}中的一个,实际为{slide_type}") + + return errors + + except Exception as e: + errors.append(f"第{slide_index}页验证出错: {str(e)}") + return errors + + async def _repair_outline_with_ai(self, outline_data: Dict[str, Any], validation_errors: List[str], confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """使用AI修复大纲JSON数据""" + try: + # 构建修复提示词 + repair_prompt = self._build_repair_prompt(outline_data, validation_errors, confirmed_requirements) + + # 调用AI进行修复 + response = await self._text_completion_for_role("outline", + prompt=repair_prompt, + max_tokens=ai_config.max_tokens, + temperature=0.3 # 使用较低的温度以确保更准确的修复 + ) + + # 解析AI返回的修复结果 + repaired_content = response.content.strip() + + # 提取JSON - 改进的提取逻辑 + import re + json_str = None + + # 方法1: 尝试提取```json```代码块中的内容 + json_block_match = re.search(r'```json\s*(\{.*?\})\s*```', repaired_content, re.DOTALL) + if json_block_match: + json_str = json_block_match.group(1) + logger.info("从```json```代码块中提取JSON") + else: + # 方法2: 尝试提取```代码块中的内容(不带json标识) + code_block_match = re.search(r'```\s*(\{.*?\})\s*```', repaired_content, re.DOTALL) + if code_block_match: + json_str = code_block_match.group(1) + logger.info("从```代码块中提取JSON") + else: + # 方法3: 尝试提取完整的JSON对象(非贪婪匹配) + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', repaired_content, re.DOTALL) + if json_match: + json_str = json_match.group() + logger.info("使用正则表达式提取JSON") + else: + # 方法4: 假设整个内容就是JSON + json_str = repaired_content + logger.info("将整个响应内容作为JSON处理") + + # 清理JSON字符串中的常见问题 + if json_str: + # 移除可能的前后空白和换行 + json_str = json_str.strip() + # 修复常见的JSON格式问题 + json_str = re.sub(r',\s*}', '}', json_str) # 移除}前的多余逗号 + json_str = re.sub(r',\s*]', ']', json_str) # 移除]前的多余逗号 + + repaired_outline = json.loads(json_str) + + logger.info("AI修复完成,返回修复后的大纲") + return repaired_outline + + except Exception as e: + logger.error(f"AI修复过程出错: {str(e)}") + # 如果AI修复失败,直接返回原始数据 + return outline_data + + def _build_repair_prompt(self, outline_data: Dict[str, Any], validation_errors: List[str], confirmed_requirements: Dict[str, Any]) -> str: + """构建AI修复提示词""" + return prompts_manager.get_repair_prompt(outline_data, validation_errors, confirmed_requirements) + + + + + async def _update_outline_generation_stage(self, project_id: str, outline_data: Dict[str, Any]): + """Update outline generation stage status and save to database""" + try: + # 保存大纲到数据库 + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + + project = await self.project_manager.get_project(project_id) + if not project: + logger.error(f"❌ Project not found in memory for project {project_id}") + return + + # 确保项目有outline数据,如果没有则使用传入的outline_data + if not project.outline: + logger.info(f"Project outline is None, setting outline from outline_data") + project.outline = outline_data + project.updated_at = time.time() + + # 保存大纲到数据库 - 使用outline_data而不是project.outline + save_success = await db_manager.save_project_outline(project_id, outline_data) + + if save_success: + logger.info(f"✅ Successfully saved outline to database for project {project_id}") + + # 验证保存是否成功 + saved_project = await db_manager.get_project(project_id) + if saved_project and saved_project.outline: + saved_slides_count = len(saved_project.outline.get('slides', [])) + logger.info(f"✅ Verified: outline saved with {saved_slides_count} slides") + + # 确保内存中的项目数据也是最新的 + project.outline = saved_project.outline + project.updated_at = saved_project.updated_at + logger.info(f"✅ Updated memory project with database outline") + else: + logger.error(f"❌ Verification failed: outline not found in database") + else: + logger.error(f"❌ Failed to save outline to database for project {project_id}") + + # Update project manager + await self.project_manager.update_project_status(project_id, "in_progress") + + # Update TODO board stage status + if project.todo_board: + for stage in project.todo_board.stages: + if stage.id == "outline_generation": + stage.status = "completed" + stage.result = {"outline_data": outline_data} + break + + # Update the project in project manager + await self.project_manager.update_stage_status( + project_id, "outline_generation", "completed", + progress=100.0, result={"outline_data": outline_data} + ) + + except Exception as e: + logger.error(f"Error updating outline generation stage: {str(e)}") + import traceback + traceback.print_exc() + + def _parse_outline_content(self, content: str, project: PPTProject) -> Dict[str, Any]: + """Parse outline content to extract structured data for PPT generation""" + try: + import re + import json + + # First try to parse the entire content as JSON + try: + json_data = json.loads(content) + if isinstance(json_data, dict) and 'slides' in json_data: + logger.info(f"Successfully parsed complete JSON outline with {len(json_data['slides'])} slides") + # 标准化slides格式以确保兼容性 + standardized_data = self._standardize_outline_format(json_data) + return standardized_data + except json.JSONDecodeError: + pass + + # 改进的JSON提取逻辑 + json_str = None + + # 方法1: 尝试提取```json```代码块中的内容 + json_block_match = re.search(r'```json\s*(\{.*?\})\s*```', content, re.DOTALL) + if json_block_match: + json_str = json_block_match.group(1) + logger.info("从```json```代码块中提取JSON") + else: + # 方法2: 尝试提取```代码块中的内容(不带json标识) + code_block_match = re.search(r'```\s*(\{.*?\})\s*```', content, re.DOTALL) + if code_block_match: + json_str = code_block_match.group(1) + logger.info("从```代码块中提取JSON") + else: + # 方法3: 尝试提取完整的JSON对象 + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', content, re.DOTALL) + if json_match: + json_str = json_match.group() + logger.info("使用正则表达式提取JSON") + + if json_str: + try: + # 清理JSON字符串 + json_str = json_str.strip() + json_str = re.sub(r',\s*}', '}', json_str) # 移除}前的多余逗号 + json_str = re.sub(r',\s*]', ']', json_str) # 移除]前的多余逗号 + + json_data = json.loads(json_str) + if 'slides' in json_data: + logger.info(f"Successfully extracted JSON from content with {len(json_data['slides'])} slides") + # 标准化slides格式以确保兼容性 + standardized_data = self._standardize_outline_format(json_data) + return standardized_data + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse extracted JSON: {e}") + pass + + # Fallback: parse text-based outline + lines = content.split('\n') + slides = [] + current_slide = None + slide_number = 1 + + for line in lines: + line = line.strip() + if not line: + continue + + # Check for slide titles (various formats) + if (line.startswith('#') or + line.startswith('第') and ('页' in line or '章' in line) or + line.startswith('Page') or + re.match(r'^\d+[\.\)]\s*', line) or + line.endswith(':') or line.endswith(':')): + + # Save previous slide + if current_slide: + slides.append(current_slide) + + # Clean title + title = re.sub(r'^#+\s*', '', line) # Remove markdown headers + title = re.sub(r'^第\d+[页章]\s*[::]\s*', '', title) # Remove "第X页:" + title = re.sub(r'^Page\s*\d+\s*[::]\s*', '', title, flags=re.IGNORECASE) # Remove "Page X:" + title = re.sub(r'^\d+[\.\)]\s*', '', title) # Remove "1. " or "1) " + title = title.rstrip('::') # Remove trailing colons + + # Determine slide type + slide_type = "content" + if slide_number == 1 or '标题' in title or 'Title' in title or '封面' in title: + slide_type = "title" + elif '谢谢' in title or 'Thank' in title or '结束' in title or '总结' in title: + slide_type = "thankyou" + elif '目录' in title or 'Agenda' in title or '大纲' in title: + slide_type = "agenda" + + current_slide = { + "page_number": slide_number, + "title": title or f"第{slide_number}页", + "content_points": [], + "slide_type": slide_type + } + slide_number += 1 + + elif current_slide and (line.startswith('-') or line.startswith('•') or + line.startswith('*') or re.match(r'^\d+[\.\)]\s*', line)): + # Content point + point = re.sub(r'^[-•*]\s*', '', line) + point = re.sub(r'^\d+[\.\)]\s*', '', point) + if point: + current_slide["content_points"].append(point) + + elif current_slide and line and not line.startswith('#'): + # Regular content line + current_slide["content_points"].append(line) + + # Add the last slide + if current_slide: + slides.append(current_slide) + + # If no slides were parsed, create a default structure + if not slides: + slides = self._create_default_slides_from_content(content, project) + + return { + "title": project.topic, + "slides": slides + } + + except Exception as e: + logger.error(f"Error parsing outline content: {str(e)}") + # Return default structure + return { + "title": project.topic, + "slides": self._create_default_slides_from_content(content, project) + } + + def _standardize_outline_format(self, outline_data: Dict[str, Any]) -> Dict[str, Any]: + """标准化大纲格式,确保slides字段的兼容性""" + try: + import re + + # 确保有基本结构 + if not isinstance(outline_data, dict): + raise ValueError("Outline data must be a dictionary") + + title = outline_data.get("title", "PPT大纲") + slides_data = outline_data.get("slides", []) + metadata = outline_data.get("metadata", {}) + + if not isinstance(slides_data, list): + raise ValueError("Slides data must be a list") + + # 标准化每个slide的格式 + standardized_slides = [] + + for i, slide in enumerate(slides_data): + if not isinstance(slide, dict): + continue + + # 提取基本信息 + page_number = slide.get("page_number", i + 1) + title_text = slide.get("title", f"第{page_number}页") + + # 处理content_points字段 + content_points = slide.get("content_points", []) + if not isinstance(content_points, list): + content_points = [] + + # 如果没有content_points,尝试从其他字段提取 + if not content_points: + # 尝试从content字段提取 + content = slide.get("content", "") + if content: + lines = content.split('\n') + for line in lines: + line = line.strip() + if line: + # 移除bullet point符号 + line = re.sub(r'^[•\-\*]\s*', '', line) + if line: + content_points.append(line) + + # 如果仍然没有,使用默认值 + if not content_points: + content_points = ["内容要点"] + + # 处理slide_type字段 + slide_type = slide.get("slide_type", slide.get("type", "content")) + + # 智能识别slide类型 + title_lower = title_text.lower() + if page_number == 1 or "标题" in title_lower or "title" in title_lower: + slide_type = "title" + elif "目录" in title_lower or "agenda" in title_lower or "大纲" in title_lower: + slide_type = "agenda" + elif "谢谢" in title_lower or "thank" in title_lower or "致谢" in title_lower: + slide_type = "thankyou" + elif "总结" in title_lower or "结论" in title_lower or "conclusion" in title_lower: + slide_type = "conclusion" + elif slide_type not in ["title", "content", "agenda", "thankyou", "conclusion"]: + slide_type = "content" + + # 构建标准化的slide + standardized_slide = { + "page_number": page_number, + "title": title_text, + "content_points": content_points, + "slide_type": slide_type, + "type": slide_type, # 添加type字段以兼容不同的访问方式 + "description": slide.get("description", "") + } + + # 保留chart_config如果存在 + if "chart_config" in slide and slide["chart_config"]: + standardized_slide["chart_config"] = slide["chart_config"] + + standardized_slides.append(standardized_slide) + + # 构建标准化的大纲 + standardized_outline = { + "title": title, + "slides": standardized_slides, + "metadata": metadata + } + + logger.info(f"Successfully standardized outline format: {title}, {len(standardized_slides)} slides") + return standardized_outline + + except Exception as e: + logger.error(f"Error standardizing outline format: {str(e)}") + # 返回原始数据或默认结构 + if isinstance(outline_data, dict) and "slides" in outline_data: + return outline_data + else: + return { + "title": "PPT大纲", + "slides": [ + { + "page_number": 1, + "title": "标题页", + "content_points": ["演示标题"], + "slide_type": "title", + "type": "title", + "description": "PPT标题页" + } + ], + "metadata": {} + } + + def _create_default_slides_from_content(self, content: str, project: PPTProject) -> List[Dict[str, Any]]: + """Create default slides structure from content""" + slides = [ + { + "page_number": 1, + "title": project.topic, + "content_points": ["项目介绍", "主要内容", "核心特点"], + "slide_type": "title" + }, + { + "page_number": 2, + "title": "主要内容", + "content_points": content.split('\n')[:5] if content else ["内容要点1", "内容要点2", "内容要点3"], + "slide_type": "content" + }, + { + "page_number": 3, + "title": "谢谢", + "content_points": ["感谢聆听"], + "slide_type": "thankyou" + } + ] + return slides + + async def update_project_outline(self, project_id: str, outline_content: str) -> bool: + """Update project outline content (expects JSON format)""" + try: + project = await self.project_manager.get_project(project_id) + if not project: + return False + + import json + + # Try to parse the content as JSON + try: + structured_outline = json.loads(outline_content) + + # Validate the JSON structure + if 'slides' not in structured_outline: + raise ValueError("Invalid JSON structure: missing 'slides'") + + # 标准化大纲格式以确保兼容性 + structured_outline = self._standardize_outline_format(structured_outline) + + # Format the JSON for consistent display + formatted_json = json.dumps(structured_outline, ensure_ascii=False, indent=2) + + except json.JSONDecodeError: + # If not valid JSON, try to parse as text and convert to JSON + structured_outline = self._parse_outline_content(outline_content, project) + formatted_json = json.dumps(structured_outline, ensure_ascii=False, indent=2) + + # Update outline in the correct field + if not project.outline: + project.outline = {} + project.outline["content"] = formatted_json # Store formatted JSON + project.outline["title"] = structured_outline.get("title", project.topic) + project.outline["slides"] = structured_outline.get("slides", []) + project.outline["updated_at"] = time.time() + + # 保存更新的大纲到数据库 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, project.outline) + + if save_success: + logger.info(f"✅ Successfully saved updated outline to database for project {project_id}") + else: + logger.error(f"❌ Failed to save updated outline to database for project {project_id}") + + except Exception as save_error: + logger.error(f"❌ Exception while saving updated outline to database: {str(save_error)}") + + # Update TODO board stage result + if project.todo_board: + for stage in project.todo_board.stages: + if stage.id == "outline_generation": + if not stage.result: + stage.result = {} + stage.result["outline_content"] = formatted_json + break + + return True + + except Exception as e: + logger.error(f"Error updating project outline: {str(e)}") + return False + + async def confirm_project_outline(self, project_id: str) -> bool: + """Confirm project outline and enable PPT generation""" + try: + project = await self.project_manager.get_project(project_id) + if not project: + return False + + # 确保大纲数据存在 + if not project.outline: + logger.error(f"No outline found for project {project_id}") + return False + + # 检查大纲是否包含slides数据 + if not project.outline.get('slides'): + logger.error(f"No slides found in outline for project {project_id}") + + # 首先尝试从confirmed_requirements中的file_generated_outline恢复 + if (project.confirmed_requirements and + project.confirmed_requirements.get('file_generated_outline') and + isinstance(project.confirmed_requirements['file_generated_outline'], dict)): + + file_outline = project.confirmed_requirements['file_generated_outline'] + if file_outline.get('slides'): + logger.info(f"Restoring outline from file_generated_outline with {len(file_outline['slides'])} slides") + # 恢复完整的大纲数据,保留确认状态 + project.outline = file_outline.copy() + project.outline["confirmed"] = True + project.outline["confirmed_at"] = time.time() + else: + logger.error(f"file_generated_outline does not contain slides data") + return False + else: + # 尝试从数据库重新加载大纲 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + db_project = await db_manager.get_project(project_id) + if db_project and db_project.outline and db_project.outline.get('slides'): + project.outline = db_project.outline + logger.info(f"Reloaded outline from database for project {project_id}") + else: + logger.error(f"No valid outline found in database for project {project_id}") + return False + except Exception as reload_error: + logger.error(f"Failed to reload outline from database: {reload_error}") + return False + + # 保留原有的大纲数据,只添加确认状态 + project.outline["confirmed"] = True + project.outline["confirmed_at"] = time.time() + + # 保存确认状态到数据库 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, project.outline) + + if save_success: + logger.info(f"✅ Successfully saved outline confirmation to database for project {project_id}") + else: + logger.error(f"❌ Failed to save outline confirmation to database for project {project_id}") + + except Exception as save_error: + logger.error(f"❌ Exception while saving outline confirmation to database: {save_error}") + + # Update TODO board - mark outline as confirmed and enable PPT creation + if project.todo_board: + for stage in project.todo_board.stages: + if stage.id == "outline_generation": + stage.status = "completed" + if not stage.result: + stage.result = {} + stage.result["confirmed"] = True + elif stage.id == "ppt_creation": + stage.status = "pending" # Enable PPT creation + break + + # Update project manager + await self.project_manager.update_stage_status( + project_id, "outline_generation", "completed", + progress=100.0, result={"confirmed": True} + ) + + return True + + except Exception as e: + logger.error(f"Error confirming project outline: {e}") + return False + + def _get_default_suggestions(self, project: PPTProject) -> Dict[str, Any]: + """Get default suggestions when AI generation fails""" + # Generate basic suggestions based on project scenario + scenario_types = { + "general": ["通用展示", "综合介绍", "概述报告", "基础展示"], + "tourism": ["旅游推介", "景点介绍", "文化展示", "旅行规划"], + "education": ["教学课件", "学术报告", "知识分享", "培训材料"], + "analysis": ["数据分析", "研究报告", "分析总结", "调研展示"], + "history": ["历史回顾", "文化传承", "时代变迁", "历史教育"], + "technology": ["技术分享", "产品介绍", "创新展示", "技术方案"], + "business": ["商业计划", "项目汇报", "业务介绍", "企业展示"] + } + + # Get type options based on scenario + type_options = scenario_types.get(project.scenario, scenario_types["general"]) + + # Generate suggested topic based on original topic + suggested_topic = f"{project.topic} - 专业展示" + + return { + "suggested_topic": suggested_topic, + "type_options": type_options + } + + def _get_default_todo_structure(self, confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """Get default TODO structure based on confirmed requirements""" + return { + "stages": [ + { + "id": "outline_generation", + "name": "生成PPT大纲", + "description": "设计PPT整体结构与框架,规划各章节内容与关键点,确定核心优势和创新点的展示方式", + "subtasks": ["生成PPT大纲"] # Single task, description is explanatory + }, + { + "id": "ppt_creation", + "name": "制作PPT", + "description": "设计PPT封面与导航页,根据大纲制作各章节内容页面,添加视觉元素和图表美化PPT", + "subtasks": ["制作PPT"] # Single task, description is explanatory + } + ] + } + + async def _update_project_todo_board(self, project_id: str, todo_data: Dict[str, Any], + confirmed_requirements: Dict[str, Any]): + """Update project TODO board with custom stages (including requirements confirmation)""" + try: + from ..api.models import TodoStage, TodoBoard + import time + + # Create complete stages including requirements confirmation + stages = [ + TodoStage( + id="requirements_confirmation", + name="需求确认", + description="AI根据用户设定的场景和上传的文件内容提供补充信息用来确认用户的任务需求", + status="completed", # This stage is completed when requirements are confirmed + progress=100.0, + subtasks=["需求确认完成"] + ) + ] + + # Add custom stages from AI generation + for stage_data in todo_data.get("stages", []): + stage = TodoStage( + id=stage_data["id"], + name=stage_data["name"], + description=stage_data["description"], + subtasks=stage_data["subtasks"], + status="pending", # Start as pending + progress=0.0 + ) + stages.append(stage) + + # Create custom TODO board + todo_board = TodoBoard( + task_id=project_id, + title=confirmed_requirements['topic'], + stages=stages + ) + + # Calculate correct overall progress + completed_stages = sum(1 for s in stages if s.status == "completed") + todo_board.overall_progress = (completed_stages / len(stages)) * 100 + + # Set current stage index to the first non-completed stage + todo_board.current_stage_index = 0 + for i, stage in enumerate(stages): + if stage.status != "completed": + todo_board.current_stage_index = i + break + + # Update project manager + self.project_manager.todo_boards[project_id] = todo_board + + # Update project with confirmed requirements + project = await self.project_manager.get_project(project_id) + if project: + project.topic = confirmed_requirements['topic'] + project.requirements = f""" +类型:{confirmed_requirements['type']} +其他说明:{confirmed_requirements.get('description', '无')} +""" + project.updated_at = time.time() + + except Exception as e: + logger.error(f"Error updating project TODO board: {e}") + raise + + async def confirm_requirements_and_update_workflow(self, project_id: str, confirmed_requirements: Dict[str, Any]) -> bool: + """Confirm requirements and update the TODO board with complete workflow""" + try: + project = await self.project_manager.get_project(project_id) + if not project: + return False + + # Store confirmed requirements + project.confirmed_requirements = confirmed_requirements + project.status = "in_progress" + project.updated_at = time.time() + + # 如果有文件生成的大纲,直接设置到项目的outline字段中 + file_generated_outline = confirmed_requirements.get('file_generated_outline') + if file_generated_outline and isinstance(file_generated_outline, dict): + logger.info(f"Setting file-generated outline to project {project_id}") + project.outline = file_generated_outline + project.updated_at = time.time() + + # Save confirmed requirements to database + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + + # Update project status + await db_manager.update_project_status(project_id, "in_progress") + logger.info(f"Successfully updated project status in database for project {project_id}") + + # Save confirmed requirements to database + await db_manager.save_confirmed_requirements(project_id, confirmed_requirements) + logger.info(f"Successfully saved confirmed requirements to database for project {project_id}") + + # 如果有文件生成的大纲,也保存到数据库 + if file_generated_outline: + save_success = await db_manager.save_project_outline(project_id, file_generated_outline) + if save_success: + logger.info(f"✅ Successfully saved file-generated outline to database for project {project_id}") + else: + logger.error(f"❌ Failed to save file-generated outline to database for project {project_id}") + + # Update requirements confirmation stage to completed + await db_manager.update_stage_status( + project_id, + "requirements_confirmation", + "completed", + 100.0, + {"confirmed_at": time.time(), "requirements": confirmed_requirements} + ) + logger.info(f"Successfully updated requirements confirmation stage to completed for project {project_id}") + + except Exception as save_error: + logger.error(f"Failed to update project status or save requirements in database: {save_error}") + import traceback + traceback.print_exc() + + # Update TODO board with default workflow (无需AI生成) - 修复:添加await + success = await self.project_manager.update_todo_board_with_confirmed_requirements( + project_id, confirmed_requirements + ) + + # 不再启动后台工作流,让前端直接控制大纲生成 + return success + + except Exception as e: + logger.error(f"Error confirming requirements: {e}") + return False + + def _load_prompts_md_system_prompt(self) -> str: + """Load system prompt from prompts.md file""" + return prompts_manager.load_prompts_md_system_prompt() + + def _load_keynote_style_prompt(self) -> str: + """Load keynote style prompt from keynote_style_prompt.md file""" + return prompts_manager.get_keynote_style_prompt() + + def _get_style_prompt(self, confirmed_requirements: Dict[str, Any]) -> str: + """Get style prompt based on confirmed requirements""" + if not confirmed_requirements: + return self._load_prompts_md_system_prompt() + + ppt_style = confirmed_requirements.get('ppt_style', 'general') + + if ppt_style == 'keynote': + return self._load_keynote_style_prompt() + elif ppt_style == 'custom': + custom_prompt = confirmed_requirements.get('custom_style_prompt', '') + if custom_prompt: + return prompts_manager.get_custom_style_prompt(custom_prompt) + else: + return self._load_prompts_md_system_prompt() + else: + # Default to general style (prompts.md) + return self._load_prompts_md_system_prompt() + + def _get_default_ppt_system_prompt(self) -> str: + """Get default PPT generation system prompt""" + return prompts_manager.get_default_ppt_system_prompt() + + async def _execute_outline_generation(self, project_id: str, confirmed_requirements: Dict[str, Any], system_prompt: str) -> str: + """Execute outline generation as a complete task""" + try: + # 处理页数设置 + page_count_settings = confirmed_requirements.get('page_count_settings', {}) + page_count_mode = page_count_settings.get('mode', 'ai_decide') + + page_count_instruction = "" + expected_page_count = None # Track expected page count for validation + + if page_count_mode == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + # 更强调页数要求 + page_count_instruction = f"- 页数要求:必须严格生成{min_pages}-{max_pages}页的PPT。请确保生成的幻灯片数量在此范围内,不能超出或不足。" + expected_page_count = {"min": min_pages, "max": max_pages, "mode": "range"} + logger.info(f"Custom page count range set: {min_pages}-{max_pages} pages") + else: + # AI决定模式:不给出具体页数限制,让AI自行判断 + page_count_instruction = "- 页数要求:请根据主题内容的复杂度、深度和逻辑结构,自主决定最合适的页数,确保内容充实且逻辑清晰" + expected_page_count = {"mode": "ai_decide"} + logger.info("AI decide mode set for page count") + + # 使用字符串拼接避免f-string中的花括号冲突 + topic = confirmed_requirements['topic'] + target_audience = confirmed_requirements.get('target_audience', '普通大众') + ppt_style = confirmed_requirements.get('ppt_style', 'general') + custom_style = confirmed_requirements.get('custom_style_prompt', '无') + description = confirmed_requirements.get('description', '无') + + # 使用新的提示词模块 + context = prompts_manager.get_outline_generation_context( + topic=topic, + target_audience=target_audience, + page_count_instruction=page_count_instruction, + ppt_style=ppt_style, + custom_style=custom_style, + description=description, + page_count_mode=page_count_mode + ) + + response = await self._text_completion_for_role("outline", + prompt=context, + system_prompt=system_prompt, + max_tokens=ai_config.max_tokens, + temperature=ai_config.temperature + ) + + # Try to parse and store the outline + import json + import re + + try: + # Extract JSON from the response content + content = response.content.strip() + + # 改进的JSON提取方法 + json_str = None + + # 方法1: 尝试提取```json```代码块中的内容 + json_block_match = re.search(r'```json\s*(\{.*?\})\s*```', content, re.DOTALL) + if json_block_match: + json_str = json_block_match.group(1) + logger.info("从```json```代码块中提取JSON") + else: + # 方法2: 尝试提取```代码块中的内容(不带json标识) + code_block_match = re.search(r'```\s*(\{.*?\})\s*```', content, re.DOTALL) + if code_block_match: + json_str = code_block_match.group(1) + logger.info("从```代码块中提取JSON") + else: + # 方法3: 尝试提取完整的JSON对象(改进的正则表达式) + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', content, re.DOTALL) + if json_match: + json_str = json_match.group() + logger.info("使用正则表达式提取JSON") + else: + # 方法4: 假设整个内容就是JSON + json_str = content + logger.info("将整个响应内容作为JSON处理") + + # 清理JSON字符串中的常见问题 + if json_str: + # 移除可能的前后空白和换行 + json_str = json_str.strip() + # 修复常见的JSON格式问题 + json_str = re.sub(r',\s*}', '}', json_str) # 移除}前的多余逗号 + json_str = re.sub(r',\s*]', ']', json_str) # 移除]前的多余逗号 + + outline_data = json.loads(json_str) + + # 验证和修复JSON数据 + outline_data = await self._validate_and_repair_outline_json(outline_data, confirmed_requirements) + + # 验证页数是否符合要求 + if expected_page_count and "slides" in outline_data: + actual_page_count = len(outline_data["slides"]) + logger.info(f"Generated outline has {actual_page_count} pages") + + if expected_page_count["mode"] == "range": + min_pages = expected_page_count["min"] + max_pages = expected_page_count["max"] + + if actual_page_count < min_pages or actual_page_count > max_pages: + logger.warning(f"Generated outline has {actual_page_count} pages, but expected {min_pages}-{max_pages} pages. Adjusting...") + # 强制调整页数 + outline_data = await self._adjust_outline_page_count(outline_data, min_pages, max_pages, confirmed_requirements) + + # 验证调整后的页数 + adjusted_page_count = len(outline_data.get("slides", [])) + logger.info(f"Adjusted outline to {adjusted_page_count} pages") + + if adjusted_page_count < min_pages or adjusted_page_count > max_pages: + logger.error(f"Failed to adjust page count to required range {min_pages}-{max_pages}") + # 如果调整失败,强制设置为中间值 + target_pages = (min_pages + max_pages) // 2 + outline_data = await self._force_page_count(outline_data, target_pages, confirmed_requirements) + else: + logger.info(f"Page count {actual_page_count} is within required range {min_pages}-{max_pages}") + + # 添加页数信息到大纲元数据 + if "metadata" not in outline_data: + outline_data["metadata"] = {} + outline_data["metadata"]["page_count_settings"] = expected_page_count + outline_data["metadata"]["actual_page_count"] = len(outline_data.get("slides", [])) + + # Store outline in project (内存中) + project = await self.project_manager.get_project(project_id) + if project: + project.outline = outline_data + project.updated_at = time.time() + logger.info(f"Successfully saved outline to memory for project {project_id}") + + # Save outline to database (数据库中) - 这是关键步骤 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, outline_data) + + if save_success: + logger.info(f"✅ Successfully saved outline to database for project {project_id}") + + # 验证保存是否成功 + saved_project = await db_manager.get_project(project_id) + if saved_project and saved_project.outline: + saved_slides_count = len(saved_project.outline.get('slides', [])) + logger.info(f"✅ Verified: outline saved with {saved_slides_count} slides") + else: + logger.error(f"❌ Verification failed: outline not found in database") + return f"❌ 大纲保存失败:数据库验证失败" + else: + logger.error(f"❌ Failed to save outline to database for project {project_id}") + return f"❌ 大纲保存失败:数据库写入失败" + + except Exception as save_error: + logger.error(f"❌ Exception while saving outline to database: {save_error}") + import traceback + traceback.print_exc() + return f"❌ 大纲保存失败:{str(save_error)}" + + # 更新大纲生成阶段状态为完成 + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + + await db_manager.update_stage_status( + project_id, + "outline_generation", + "completed", + 100.0, + { + "outline_title": outline_data.get('title', '未知'), + "slides_count": len(outline_data.get('slides', [])), + "completed_at": time.time() + } + ) + logger.info(f"Successfully updated outline generation stage to completed for project {project_id}") + + except Exception as stage_error: + logger.error(f"Failed to update outline generation stage status: {stage_error}") + + final_page_count = len(outline_data.get('slides', [])) + return f"✅ PPT大纲生成完成!\n\n标题:{outline_data.get('title', '未知')}\n页数:{final_page_count}页\n已保存到数据库\n\n{response.content}" + + except Exception as e: + logger.error(f"Error parsing outline JSON: {e}") + logger.error(f"Response content: {response.content[:500]}...") + + # Try to create a basic outline structure from the response + try: + # Create a fallback outline structure + fallback_outline = { + "title": confirmed_requirements.get('topic', 'AI生成的PPT大纲'), + "slides": [ + { + "page_number": 1, + "title": confirmed_requirements.get('topic', '标题页'), + "content_points": ["项目介绍", "主要内容", "核心价值"], + "slide_type": "title" + }, + { + "page_number": 2, + "title": "主要内容", + "content_points": ["内容要点1", "内容要点2", "内容要点3"], + "slide_type": "content" + }, + { + "page_number": 3, + "title": "谢谢观看", + "content_points": ["感谢聆听", "欢迎提问"], + "slide_type": "thankyou" + } + ] + } + + # 验证和修复fallback大纲 + fallback_outline = await self._validate_and_repair_outline_json(fallback_outline, confirmed_requirements) + + # Store fallback outline in project + project = await self.project_manager.get_project(project_id) + if project: + project.outline = fallback_outline + project.updated_at = time.time() + logger.info(f"Saved fallback outline for project {project_id}") + + # Save to database + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + save_success = await db_manager.save_project_outline(project_id, fallback_outline) + + if save_success: + logger.info(f"Successfully saved fallback outline to database for project {project_id}") + else: + logger.error(f"Failed to save fallback outline to database for project {project_id}") + except Exception as save_error: + logger.error(f"Exception while saving fallback outline to database: {save_error}") + + final_page_count = len(fallback_outline.get('slides', [])) + return f"✅ PPT大纲生成完成!(使用备用方案)\n\n标题:{fallback_outline.get('title', '未知')}\n页数:{final_page_count}页\n已保存到数据库" + + except Exception as fallback_error: + logger.error(f"Error creating fallback outline: {fallback_error}") + return f"❌ 大纲生成失败:{str(e)}\n\n{response.content}" + + except Exception as e: + logger.error(f"Error in outline generation: {e}") + raise + + async def _adjust_outline_page_count(self, outline_data: Dict[str, Any], min_pages: int, max_pages: int, confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """Adjust outline page count to meet requirements""" + try: + current_slides = outline_data.get("slides", []) + current_count = len(current_slides) + + if current_count < min_pages: + # Need to add more slides + logger.info(f"Adding slides to meet minimum requirement: {current_count} -> {min_pages}") + outline_data = await self._expand_outline(outline_data, min_pages, confirmed_requirements) + elif current_count > max_pages: + # Need to reduce slides + logger.info(f"Reducing slides to meet maximum requirement: {current_count} -> {max_pages}") + outline_data = await self._condense_outline(outline_data, max_pages) + + return outline_data + + except Exception as e: + logger.error(f"Error adjusting outline page count: {e}") + return outline_data # Return original if adjustment fails + + async def _expand_outline(self, outline_data: Dict[str, Any], target_pages: int, confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """Expand outline to reach target page count""" + try: + slides = outline_data.get("slides", []) + current_count = len(slides) + needed_slides = target_pages - current_count + + # Generate additional slides based on content + topic = confirmed_requirements.get('topic', outline_data.get('title', '')) + focus_content = confirmed_requirements.get('focus_content', []) + + # Add content slides before the conclusion + conclusion_slide = None + if slides and slides[-1].get('slide_type') in ['thankyou', 'conclusion']: + conclusion_slide = slides.pop() + + for i in range(needed_slides): + page_number = len(slides) + 1 + if i < len(focus_content): + # Use focus content for new slides + new_slide = { + "page_number": page_number, + "title": focus_content[i], + "content_points": [f"{focus_content[i]}的详细介绍", "核心要点", "实际应用"], + "slide_type": "content", + "description": f"详细介绍{focus_content[i]}相关内容" + } + else: + # Generate generic content slides + new_slide = { + "page_number": page_number, + "title": f"{topic} - 补充内容 {i+1}", + "content_points": ["补充要点1", "补充要点2", "补充要点3"], + "slide_type": "content", + "description": f"关于{topic}的补充内容" + } + slides.append(new_slide) + + # Re-add conclusion slide if it existed + if conclusion_slide: + conclusion_slide["page_number"] = len(slides) + 1 + slides.append(conclusion_slide) + + # Update page numbers + for i, slide in enumerate(slides): + slide["page_number"] = i + 1 + + outline_data["slides"] = slides + return outline_data + + except Exception as e: + logger.error(f"Error expanding outline: {e}") + return outline_data + + async def _condense_outline(self, outline_data: Dict[str, Any], target_pages: int) -> Dict[str, Any]: + """Condense outline to reach target page count""" + try: + slides = outline_data.get("slides", []) + current_count = len(slides) + + if current_count <= target_pages: + return outline_data + + # Keep title and conclusion slides, condense content slides + title_slides = [s for s in slides if s.get('slide_type') in ['title', 'cover']] + conclusion_slides = [s for s in slides if s.get('slide_type') in ['thankyou', 'conclusion']] + content_slides = [s for s in slides if s.get('slide_type') not in ['title', 'cover', 'thankyou', 'conclusion']] + + # Calculate how many content slides we can keep + reserved_slots = len(title_slides) + len(conclusion_slides) + available_content_slots = target_pages - reserved_slots + + if available_content_slots > 0 and len(content_slides) > available_content_slots: + # Keep the most important content slides + content_slides = content_slides[:available_content_slots] + + # Rebuild slides list + new_slides = title_slides + content_slides + conclusion_slides + + # Update page numbers + for i, slide in enumerate(new_slides): + slide["page_number"] = i + 1 + + outline_data["slides"] = new_slides + return outline_data + + except Exception as e: + logger.error(f"Error condensing outline: {e}") + return outline_data + + async def _force_page_count(self, outline_data: Dict[str, Any], target_pages: int, confirmed_requirements: Dict[str, Any]) -> Dict[str, Any]: + """Force outline to exact page count""" + try: + slides = outline_data.get("slides", []) + current_count = len(slides) + + logger.info(f"Forcing page count from {current_count} to {target_pages}") + + if current_count == target_pages: + return outline_data + + # Keep title and conclusion slides + title_slides = [s for s in slides if s.get('slide_type') in ['title', 'cover']] + conclusion_slides = [s for s in slides if s.get('slide_type') in ['thankyou', 'conclusion']] + content_slides = [s for s in slides if s.get('slide_type') not in ['title', 'cover', 'thankyou', 'conclusion']] + + # Calculate content slots needed + reserved_slots = len(title_slides) + len(conclusion_slides) + content_slots_needed = target_pages - reserved_slots + + if content_slots_needed <= 0: + # Only keep title slide if no room for content + new_slides = title_slides[:1] if title_slides else [] + else: + if len(content_slides) > content_slots_needed: + # Reduce content slides + content_slides = content_slides[:content_slots_needed] + elif len(content_slides) < content_slots_needed: + # Add more content slides + topic = confirmed_requirements.get('topic', outline_data.get('title', '')) + focus_content = confirmed_requirements.get('focus_content', []) + + for i in range(content_slots_needed - len(content_slides)): + page_number = len(content_slides) + i + 1 + if i < len(focus_content): + new_slide = { + "page_number": page_number, + "title": focus_content[i], + "content_points": [f"{focus_content[i]}的详细介绍", "核心要点", "实际应用"], + "slide_type": "content", + "description": f"详细介绍{focus_content[i]}相关内容" + } + else: + new_slide = { + "page_number": page_number, + "title": f"{topic} - 内容 {i+1}", + "content_points": ["要点1", "要点2", "要点3"], + "slide_type": "content", + "description": f"关于{topic}的内容" + } + content_slides.append(new_slide) + + # Rebuild slides list + new_slides = title_slides + content_slides + conclusion_slides + + # Update page numbers + for i, slide in enumerate(new_slides): + slide["page_number"] = i + 1 + + outline_data["slides"] = new_slides + logger.info(f"Successfully forced page count to {len(new_slides)} pages") + return outline_data + + except Exception as e: + logger.error(f"Error forcing page count: {e}") + return outline_data + + async def _execute_ppt_creation(self, project_id: str, confirmed_requirements: Dict[str, Any], system_prompt: str) -> str: + """Execute PPT creation by generating HTML pages individually with streaming""" + try: + project = await self.project_manager.get_project(project_id) + if not project or not project.outline: + return "❌ 错误:未找到PPT大纲,请先完成大纲生成步骤" + + outline = project.outline + slides = outline.get('slides', []) + + if not slides: + return "❌ 错误:大纲中没有幻灯片信息" + + # 验证大纲页数与需求一致性 + if project.confirmed_requirements: + page_count_settings = project.confirmed_requirements.get('page_count_settings', {}) + if page_count_settings.get('mode') == 'custom_range': + min_pages = page_count_settings.get('min_pages', 8) + max_pages = page_count_settings.get('max_pages', 15) + actual_pages = len(slides) + + if actual_pages < min_pages or actual_pages > max_pages: + logger.warning(f"Outline has {actual_pages} pages, but requirements specify {min_pages}-{max_pages} pages") + return f"⚠️ 错误:大纲有{actual_pages}页,但需求要求{min_pages}-{max_pages}页。请重新生成大纲以符合页数要求。" + + # Initialize slides data - 确保与大纲页数完全一致 + project.slides_data = [] + project.updated_at = time.time() + + # 确保confirmed_requirements包含项目ID,用于模板选择 + if confirmed_requirements: + confirmed_requirements['project_id'] = project_id + + # 验证slides数据结构 + if not slides or len(slides) == 0: + return "❌ 错误:大纲中没有有效的幻灯片数据" + + logger.info(f"Starting PPT generation for {len(slides)} slides based on outline") + + # 确保每个slide都有必要的字段 + for i, slide in enumerate(slides): + if not slide.get('title'): + slide['title'] = f"幻灯片 {i+1}" + if not slide.get('page_number'): + slide['page_number'] = i + 1 + + return f"🚀 开始PPT制作...\n\n将严格按照大纲为 {len(slides)} 页幻灯片逐页生成HTML内容\n大纲页数:{len(slides)}页\n请在编辑器中查看实时生成过程" + + except Exception as e: + logger.error(f"Error in PPT creation: {e}") + raise + + async def generate_slides_streaming(self, project_id: str): + """Generate slides with streaming output for real-time display""" + try: + import json + import time + + project = await self.project_manager.get_project(project_id) + if not project: + error_data = {'error': '项目未找到'} + yield f"data: {json.dumps(error_data)}\n\n" + return + + # 检查并确保大纲数据正确 + outline = None + slides = [] + + # 首先尝试从项目中获取大纲 + if project.outline and isinstance(project.outline, dict): + outline = project.outline + slides = outline.get('slides', []) + logger.info(f"Found outline in project with {len(slides)} slides") + + # 如果没有slides或slides为空,尝试从数据库重新加载 + if not slides: + logger.info(f"No slides found in project outline, attempting to reload from database") + logger.error(f"DEBUG: Full outline structure for project {project_id}:") + logger.error(f"Outline type: {type(project.outline)}") + if project.outline: + logger.error(f"Outline keys: {list(project.outline.keys()) if isinstance(project.outline, dict) else 'Not a dict'}") + if isinstance(project.outline, dict) and 'slides' in project.outline: + logger.error(f"Slides type: {type(project.outline['slides'])}, content: {project.outline['slides']}") + + try: + from .db_project_manager import DatabaseProjectManager + db_manager = DatabaseProjectManager() + + # 重新从数据库获取项目数据 + fresh_project = await db_manager.get_project(project_id) + if fresh_project and fresh_project.outline: + outline = fresh_project.outline + slides = outline.get('slides', []) + logger.info(f"Reloaded outline from database with {len(slides)} slides") + + # 更新内存中的项目数据 + project.outline = outline + else: + logger.error(f"Failed to reload project from database or outline is None") + if fresh_project: + logger.error(f"Fresh project outline type: {type(fresh_project.outline)}") + + except Exception as db_error: + logger.error(f"Failed to reload outline from database: {db_error}") + import traceback + logger.error(f"Database reload traceback: {traceback.format_exc()}") + + # 如果仍然没有slides,检查是否有大纲内容需要解析 + if not slides and outline and 'content' in outline: + logger.info(f"Found outline content, attempting to parse slides") + try: + # 尝试解析大纲内容 + parsed_outline = self._parse_outline_content(outline['content'], project) + slides = parsed_outline.get('slides', []) + logger.info(f"Parsed {len(slides)} slides from outline content") + + # 更新大纲数据 + outline['slides'] = slides + project.outline = outline + + except Exception as parse_error: + logger.error(f"Failed to parse outline content: {parse_error}") + + # 特殊处理:如果outline直接包含slides数组但为空,尝试从content字段解析 + if not slides and outline and isinstance(outline, dict): + # 检查是否有content字段包含JSON格式的大纲 + content_field = outline.get('content', '') + if content_field and isinstance(content_field, str): + logger.info(f"Attempting to parse slides from content field") + try: + import json + # 尝试解析content字段中的JSON + content_data = json.loads(content_field) + if isinstance(content_data, dict) and 'slides' in content_data: + slides = content_data['slides'] + logger.info(f"Successfully parsed {len(slides)} slides from content JSON") + + # 更新outline中的slides + outline['slides'] = slides + project.outline = outline + except json.JSONDecodeError as json_error: + logger.error(f"Failed to parse content as JSON: {json_error}") + except Exception as content_error: + logger.error(f"Failed to extract slides from content: {content_error}") + + # 最后尝试:如果outline本身就是完整的大纲数据(包含title和slides) + if not slides and outline and isinstance(outline, dict): + # 检查outline是否直接包含slides数组 + direct_slides = outline.get('slides', []) + if direct_slides and isinstance(direct_slides, list): + slides = direct_slides + logger.info(f"Found {len(slides)} slides directly in outline") + # 或者检查是否有嵌套的大纲结构 + elif 'outline' in outline and isinstance(outline['outline'], dict): + nested_slides = outline['outline'].get('slides', []) + if nested_slides and isinstance(nested_slides, list): + slides = nested_slides + logger.info(f"Found {len(slides)} slides in nested outline structure") + + # 额外调试:打印outline结构以便诊断 + if not slides: + logger.error(f"DEBUG: Full outline structure for project {project_id}:") + logger.error(f"Outline type: {type(outline)}") + if outline: + logger.error(f"Outline keys: {list(outline.keys()) if isinstance(outline, dict) else 'Not a dict'}") + if isinstance(outline, dict): + for key, value in outline.items(): + logger.error(f" {key}: {type(value)} - {len(value) if isinstance(value, (list, dict, str)) else value}") + if key == 'slides' and isinstance(value, list): + logger.error(f" Slides count: {len(value)}") + if value: + logger.error(f" First slide: {value[0] if len(value) > 0 else 'None'}") + elif key == 'content' and isinstance(value, str): + logger.error(f" Content preview: {value[:200]}...") + + # 尝试直接从outline中提取slides,不管结构如何 + if isinstance(outline, dict): + # 递归搜索slides字段 + def find_slides_recursive(obj, path=""): + if isinstance(obj, dict): + for k, v in obj.items(): + current_path = f"{path}.{k}" if path else k + if k == 'slides' and isinstance(v, list) and v: + logger.info(f"Found slides at path: {current_path} with {len(v)} items") + return v + elif isinstance(v, (dict, list)): + result = find_slides_recursive(v, current_path) + if result: + return result + elif isinstance(obj, list): + for i, item in enumerate(obj): + current_path = f"{path}[{i}]" if path else f"[{i}]" + if isinstance(item, (dict, list)): + result = find_slides_recursive(item, current_path) + if result: + return result + return None + + found_slides = find_slides_recursive(outline) + if found_slides: + slides = found_slides + logger.info(f"Successfully found {len(slides)} slides through recursive search") + + # 最后的fallback:如果仍然没有slides,返回错误而不是生成默认大纲 + if not slides: + error_message = "❌ 错误:未找到PPT大纲数据,请先完成大纲生成步骤" + logger.error(f"No slides found for project {project_id}") + logger.error(f"Project outline structure: {type(project.outline)}") + if project.outline: + logger.error(f"Outline keys: {list(project.outline.keys()) if isinstance(project.outline, dict) else 'Not a dict'}") + if isinstance(project.outline, dict) and 'slides' in project.outline: + logger.error(f"Slides type: {type(project.outline['slides'])}, length: {len(project.outline['slides']) if isinstance(project.outline['slides'], list) else 'Not a list'}") + error_data = {'error': error_message} + yield f"data: {json.dumps(error_data)}\n\n" + return + + # 如果没有确认需求,使用默认需求配置 + if not project.confirmed_requirements: + logger.info(f"Project {project_id} has no confirmed requirements, using default configuration") + confirmed_requirements = { + "topic": project.topic, + "target_audience": "普通大众", + "focus_content": ["核心概念", "主要特点"], + "tech_highlights": ["技术要点", "实践应用"], + "page_count_settings": {"mode": "ai_decide"}, + "ppt_style": "general", + "description": f"基于主题 '{project.topic}' 的PPT演示" + } + else: + confirmed_requirements = project.confirmed_requirements + + # 确保我们有有效的大纲和slides数据 + if not outline: + outline = project.outline + + if not slides: + slides = outline.get('slides', []) if outline else [] + + # 最终检查:如果仍然没有slides,返回错误 + if not slides: + error_message = "❌ 错误:大纲中没有幻灯片信息,请检查大纲生成是否完成" + logger.error(f"No slides found after all attempts for project {project_id}") + error_data = {'error': error_message} + yield f"data: {json.dumps(error_data)}\n\n" + return + + logger.info(f"Starting PPT generation for project {project_id} with {len(slides)} slides") + + # Load system prompt + system_prompt = self._load_prompts_md_system_prompt() + + # Initialize slides data if not exists + if not project.slides_data: + project.slides_data = [] + + # 检查是否启用并行生成 + parallel_enabled = ai_config.enable_parallel_generation + parallel_count = ai_config.parallel_slides_count if parallel_enabled else 1 + + if parallel_enabled: + logger.info(f"🚀 并行生成已启用,每批生成 {parallel_count} 页") + else: + logger.info(f"📝 使用顺序生成模式") + + # 批量生成幻灯片(支持并行和顺序两种模式) + i = 0 + while i < len(slides): + # 确定本批次要生成的幻灯片 + batch_end = min(i + parallel_count, len(slides)) + batch_slides = slides[i:batch_end] + + # 收集本批次需要生成的幻灯片 + slides_to_generate = [] + slides_to_skip = [] + + for idx in range(i, batch_end): + slide = slides[idx] + + # 检查是否已存在 + existing_slide = None + if project.slides_data and idx < len(project.slides_data): + existing_slide = project.slides_data[idx] + + if existing_slide and existing_slide.get('html_content'): + # 幻灯片已存在,跳过 + if existing_slide.get('is_user_edited', False): + skip_message = f'第{idx+1}页已被用户编辑,跳过重新生成' + else: + skip_message = f'第{idx+1}页已存在,跳过生成' + + skip_data = { + 'type': 'slide_skipped', + 'current': idx + 1, + 'total': len(slides), + 'message': skip_message, + 'slide_data': existing_slide + } + yield f"data: {json.dumps(skip_data)}\n\n" + slides_to_skip.append(idx) + else: + # 需要生成 + slides_to_generate.append((idx, slide)) + + # 如果有需要生成的幻灯片 + if slides_to_generate: + if parallel_enabled and len(slides_to_generate) > 1: + # 并行生成 + logger.info(f"📦 并行生成 {len(slides_to_generate)} 页") + + # 发送进度消息 + for idx, slide in slides_to_generate: + progress_data = { + 'type': 'progress', + 'current': idx + 1, + 'total': len(slides), + 'message': f'正在生成第{idx+1}-{idx+1+len(slides_to_generate)}页...' + } + yield f"data: {json.dumps(progress_data)}\n\n" + + # 创建并行任务 + tasks = [] + for idx, slide in slides_to_generate: + task = self._generate_single_slide_html_with_prompts( + slide, confirmed_requirements, system_prompt, + idx + 1, len(slides), slides, project.slides_data, project_id + ) + tasks.append(task) + + # 并行执行 + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 处理结果 + for (idx, slide), result in zip(slides_to_generate, results): + try: + if isinstance(result, Exception): + raise result + + html_content = result + logger.info(f"✅ 并行生成第{idx+1}页成功") + except Exception as e: + logger.error(f"❌ 并行生成第{idx+1}页失败: {e}") + html_content = f"