From 7b3b5b83b5cf711b39b6878400c49d193c919f47 Mon Sep 17 00:00:00 2001 From: 13315423919 <13315423919@qq.com> Date: Fri, 7 Nov 2025 09:05:20 +0800 Subject: [PATCH] Add File --- .../services/speech_script_exporter.py | 302 ++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 src/landppt/services/speech_script_exporter.py diff --git a/src/landppt/services/speech_script_exporter.py b/src/landppt/services/speech_script_exporter.py new file mode 100644 index 0000000..d7006af --- /dev/null +++ b/src/landppt/services/speech_script_exporter.py @@ -0,0 +1,302 @@ +""" +Speech Script Export Service +Handles exporting speech scripts to various document formats (DOCX, Markdown) +""" + +import logging +import tempfile +import os +from typing import List, Dict, Any, Optional +from pathlib import Path +import asyncio +from concurrent.futures import ThreadPoolExecutor + +from .speech_script_service import SlideScriptData + +logger = logging.getLogger(__name__) + +# Check for optional dependencies +try: + from docx import Document + from docx.shared import Inches + from docx.enum.text import WD_ALIGN_PARAGRAPH + DOCX_AVAILABLE = True +except ImportError: + DOCX_AVAILABLE = False + logger.warning("python-docx not available. DOCX export will be disabled.") + + +class SpeechScriptExporter: + """Service for exporting speech scripts to various document formats""" + + def __init__(self): + self.executor = ThreadPoolExecutor(max_workers=2) + + async def export_to_docx( + self, + scripts: List[SlideScriptData], + project_title: str, + metadata: Optional[Dict[str, Any]] = None + ) -> bytes: + """Export speech scripts to DOCX format""" + if not DOCX_AVAILABLE: + raise ValueError("DOCX export not available. Please install python-docx: pip install python-docx") + + # Run the synchronous DOCX generation in thread pool + loop = asyncio.get_event_loop() + docx_content = await loop.run_in_executor( + self.executor, + self._generate_docx_sync, + scripts, + project_title, + metadata + ) + + return docx_content + + async def export_to_markdown( + self, + scripts: List[SlideScriptData], + project_title: str, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """Export speech scripts to Markdown format""" + + # Run the markdown generation in thread pool + loop = asyncio.get_event_loop() + markdown_content = await loop.run_in_executor( + self.executor, + self._generate_markdown_sync, + scripts, + project_title, + metadata + ) + + return markdown_content + + def _generate_docx_sync( + self, + scripts: List[SlideScriptData], + project_title: str, + metadata: Optional[Dict[str, Any]] = None + ) -> bytes: + """Generate DOCX document synchronously""" + try: + # Create new document + doc = Document() + + # Set document language and encoding + doc.core_properties.language = 'zh-CN' + + # Add title + title = doc.add_heading(f'{project_title} - 演讲稿', 0) + title.alignment = WD_ALIGN_PARAGRAPH.CENTER + + # Set font for the title to support Chinese characters + for run in title.runs: + run.font.name = 'Microsoft YaHei' + run._element.rPr.rFonts.set('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}eastAsia', 'Microsoft YaHei') + + # Add metadata if provided + if metadata: + doc.add_paragraph() + meta_para = doc.add_paragraph() + meta_run = meta_para.add_run('生成信息:') + meta_run.bold = True + self._set_chinese_font(meta_run) + + if 'generation_time' in metadata: + import time + gen_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata['generation_time'])) + time_para = doc.add_paragraph(f'生成时间:{gen_time}') + self._set_paragraph_chinese_font(time_para) + + if 'total_estimated_duration' in metadata: + duration_para = doc.add_paragraph(f'预计总时长:{metadata["total_estimated_duration"]}') + self._set_paragraph_chinese_font(duration_para) + + if 'customization' in metadata: + custom = metadata['customization'] + if 'tone' in custom: + tone_para = doc.add_paragraph(f'语调风格:{custom["tone"]}') + self._set_paragraph_chinese_font(tone_para) + if 'target_audience' in custom: + audience_para = doc.add_paragraph(f'目标受众:{custom["target_audience"]}') + self._set_paragraph_chinese_font(audience_para) + + doc.add_page_break() + + # Add scripts for each slide + for i, script in enumerate(scripts): + # Add slide header + if script.slide_index == -1: + # Opening remarks + slide_header = doc.add_heading('开场白', level=1) + elif script.slide_index >= len(scripts) - 1 and script.slide_title == "结束语": + # Closing remarks + slide_header = doc.add_heading('结束语', level=1) + else: + # Regular slide + slide_header = doc.add_heading(f'第{script.slide_index + 1}页:{script.slide_title}', level=1) + + # Set Chinese font for header + self._set_paragraph_chinese_font(slide_header) + + # Add duration if available + if script.estimated_duration: + duration_para = doc.add_paragraph() + duration_run = duration_para.add_run(f'预计时长:{script.estimated_duration}') + duration_run.italic = True + self._set_chinese_font(duration_run) + + # Add script content + doc.add_paragraph() + script_para = doc.add_paragraph(script.script_content) + script_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY + self._set_paragraph_chinese_font(script_para) + + # Add speaker notes if available + if script.speaker_notes: + doc.add_paragraph() + notes_para = doc.add_paragraph() + notes_title_run = notes_para.add_run('演讲提示:') + notes_title_run.bold = True + self._set_chinese_font(notes_title_run) + + notes_content_run = notes_para.add_run(script.speaker_notes) + notes_content_run.italic = True + self._set_chinese_font(notes_content_run) + + # Add page break except for the last script + if i < len(scripts) - 1: + doc.add_page_break() + + # Save to temporary file and read content + with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as temp_file: + doc.save(temp_file.name) + temp_file_path = temp_file.name + + try: + with open(temp_file_path, 'rb') as f: + content = f.read() + return content + finally: + # Clean up temporary file + try: + os.unlink(temp_file_path) + except: + pass + + except Exception as e: + logger.error(f"Error generating DOCX: {e}") + raise + + def _set_chinese_font(self, run): + """Set Chinese font for a run to prevent encoding issues""" + try: + run.font.name = 'Microsoft YaHei' + # Set East Asian font for Chinese characters + run._element.rPr.rFonts.set( + '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}eastAsia', + 'Microsoft YaHei' + ) + except Exception as e: + logger.warning(f"Failed to set Chinese font: {e}") + + def _set_paragraph_chinese_font(self, paragraph): + """Set Chinese font for all runs in a paragraph""" + try: + for run in paragraph.runs: + self._set_chinese_font(run) + except Exception as e: + logger.warning(f"Failed to set paragraph Chinese font: {e}") + + def _generate_markdown_sync( + self, + scripts: List[SlideScriptData], + project_title: str, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """Generate Markdown document synchronously""" + try: + lines = [] + + # Add title + lines.append(f'# {project_title} - 演讲稿\n') + + # Add metadata if provided + if metadata: + lines.append('## 生成信息\n') + + if 'generation_time' in metadata: + import time + gen_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata['generation_time'])) + lines.append(f'- **生成时间**:{gen_time}') + + if 'total_estimated_duration' in metadata: + lines.append(f'- **预计总时长**:{metadata["total_estimated_duration"]}') + + if 'customization' in metadata: + custom = metadata['customization'] + if 'tone' in custom: + lines.append(f'- **语调风格**:{custom["tone"]}') + if 'target_audience' in custom: + lines.append(f'- **目标受众**:{custom["target_audience"]}') + + lines.append('') + + lines.append('---\n') + + # Add scripts for each slide + for script in scripts: + # Add slide header + if script.slide_index == -1: + # Opening remarks + lines.append('## 开场白\n') + elif script.slide_index >= len(scripts) - 1 and script.slide_title == "结束语": + # Closing remarks + lines.append('## 结束语\n') + else: + # Regular slide + lines.append(f'## 第{script.slide_index + 1}页:{script.slide_title}\n') + + # Add duration if available + if script.estimated_duration: + lines.append(f'**预计时长**:{script.estimated_duration}\n') + + # Add script content + lines.append(script.script_content) + lines.append('') + + # Add speaker notes if available + if script.speaker_notes: + lines.append('> **演讲提示**:' + script.speaker_notes) + lines.append('') + + lines.append('---\n') + + return '\n'.join(lines) + + except Exception as e: + logger.error(f"Error generating Markdown: {e}") + raise + + def is_docx_available(self) -> bool: + """Check if DOCX export is available""" + return DOCX_AVAILABLE + + async def cleanup(self): + """Cleanup resources""" + if hasattr(self, 'executor'): + self.executor.shutdown(wait=True) + + +# Global instance +_speech_script_exporter = None + +def get_speech_script_exporter() -> SpeechScriptExporter: + """Get global speech script exporter instance""" + global _speech_script_exporter + if _speech_script_exporter is None: + _speech_script_exporter = SpeechScriptExporter() + return _speech_script_exporter