Add File
This commit is contained in:
302
src/landppt/services/speech_script_exporter.py
Normal file
302
src/landppt/services/speech_script_exporter.py
Normal file
@@ -0,0 +1,302 @@
|
||||
"""
|
||||
Speech Script Export Service
|
||||
Handles exporting speech scripts to various document formats (DOCX, Markdown)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from .speech_script_service import SlideScriptData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Check for optional dependencies
|
||||
try:
|
||||
from docx import Document
|
||||
from docx.shared import Inches
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
DOCX_AVAILABLE = True
|
||||
except ImportError:
|
||||
DOCX_AVAILABLE = False
|
||||
logger.warning("python-docx not available. DOCX export will be disabled.")
|
||||
|
||||
|
||||
class SpeechScriptExporter:
|
||||
"""Service for exporting speech scripts to various document formats"""
|
||||
|
||||
def __init__(self):
|
||||
self.executor = ThreadPoolExecutor(max_workers=2)
|
||||
|
||||
async def export_to_docx(
|
||||
self,
|
||||
scripts: List[SlideScriptData],
|
||||
project_title: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> bytes:
|
||||
"""Export speech scripts to DOCX format"""
|
||||
if not DOCX_AVAILABLE:
|
||||
raise ValueError("DOCX export not available. Please install python-docx: pip install python-docx")
|
||||
|
||||
# Run the synchronous DOCX generation in thread pool
|
||||
loop = asyncio.get_event_loop()
|
||||
docx_content = await loop.run_in_executor(
|
||||
self.executor,
|
||||
self._generate_docx_sync,
|
||||
scripts,
|
||||
project_title,
|
||||
metadata
|
||||
)
|
||||
|
||||
return docx_content
|
||||
|
||||
async def export_to_markdown(
|
||||
self,
|
||||
scripts: List[SlideScriptData],
|
||||
project_title: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Export speech scripts to Markdown format"""
|
||||
|
||||
# Run the markdown generation in thread pool
|
||||
loop = asyncio.get_event_loop()
|
||||
markdown_content = await loop.run_in_executor(
|
||||
self.executor,
|
||||
self._generate_markdown_sync,
|
||||
scripts,
|
||||
project_title,
|
||||
metadata
|
||||
)
|
||||
|
||||
return markdown_content
|
||||
|
||||
def _generate_docx_sync(
|
||||
self,
|
||||
scripts: List[SlideScriptData],
|
||||
project_title: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> bytes:
|
||||
"""Generate DOCX document synchronously"""
|
||||
try:
|
||||
# Create new document
|
||||
doc = Document()
|
||||
|
||||
# Set document language and encoding
|
||||
doc.core_properties.language = 'zh-CN'
|
||||
|
||||
# Add title
|
||||
title = doc.add_heading(f'{project_title} - 演讲稿', 0)
|
||||
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Set font for the title to support Chinese characters
|
||||
for run in title.runs:
|
||||
run.font.name = 'Microsoft YaHei'
|
||||
run._element.rPr.rFonts.set('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}eastAsia', 'Microsoft YaHei')
|
||||
|
||||
# Add metadata if provided
|
||||
if metadata:
|
||||
doc.add_paragraph()
|
||||
meta_para = doc.add_paragraph()
|
||||
meta_run = meta_para.add_run('生成信息:')
|
||||
meta_run.bold = True
|
||||
self._set_chinese_font(meta_run)
|
||||
|
||||
if 'generation_time' in metadata:
|
||||
import time
|
||||
gen_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata['generation_time']))
|
||||
time_para = doc.add_paragraph(f'生成时间:{gen_time}')
|
||||
self._set_paragraph_chinese_font(time_para)
|
||||
|
||||
if 'total_estimated_duration' in metadata:
|
||||
duration_para = doc.add_paragraph(f'预计总时长:{metadata["total_estimated_duration"]}')
|
||||
self._set_paragraph_chinese_font(duration_para)
|
||||
|
||||
if 'customization' in metadata:
|
||||
custom = metadata['customization']
|
||||
if 'tone' in custom:
|
||||
tone_para = doc.add_paragraph(f'语调风格:{custom["tone"]}')
|
||||
self._set_paragraph_chinese_font(tone_para)
|
||||
if 'target_audience' in custom:
|
||||
audience_para = doc.add_paragraph(f'目标受众:{custom["target_audience"]}')
|
||||
self._set_paragraph_chinese_font(audience_para)
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
# Add scripts for each slide
|
||||
for i, script in enumerate(scripts):
|
||||
# Add slide header
|
||||
if script.slide_index == -1:
|
||||
# Opening remarks
|
||||
slide_header = doc.add_heading('开场白', level=1)
|
||||
elif script.slide_index >= len(scripts) - 1 and script.slide_title == "结束语":
|
||||
# Closing remarks
|
||||
slide_header = doc.add_heading('结束语', level=1)
|
||||
else:
|
||||
# Regular slide
|
||||
slide_header = doc.add_heading(f'第{script.slide_index + 1}页:{script.slide_title}', level=1)
|
||||
|
||||
# Set Chinese font for header
|
||||
self._set_paragraph_chinese_font(slide_header)
|
||||
|
||||
# Add duration if available
|
||||
if script.estimated_duration:
|
||||
duration_para = doc.add_paragraph()
|
||||
duration_run = duration_para.add_run(f'预计时长:{script.estimated_duration}')
|
||||
duration_run.italic = True
|
||||
self._set_chinese_font(duration_run)
|
||||
|
||||
# Add script content
|
||||
doc.add_paragraph()
|
||||
script_para = doc.add_paragraph(script.script_content)
|
||||
script_para.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||
self._set_paragraph_chinese_font(script_para)
|
||||
|
||||
# Add speaker notes if available
|
||||
if script.speaker_notes:
|
||||
doc.add_paragraph()
|
||||
notes_para = doc.add_paragraph()
|
||||
notes_title_run = notes_para.add_run('演讲提示:')
|
||||
notes_title_run.bold = True
|
||||
self._set_chinese_font(notes_title_run)
|
||||
|
||||
notes_content_run = notes_para.add_run(script.speaker_notes)
|
||||
notes_content_run.italic = True
|
||||
self._set_chinese_font(notes_content_run)
|
||||
|
||||
# Add page break except for the last script
|
||||
if i < len(scripts) - 1:
|
||||
doc.add_page_break()
|
||||
|
||||
# Save to temporary file and read content
|
||||
with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as temp_file:
|
||||
doc.save(temp_file.name)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
with open(temp_file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
return content
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
try:
|
||||
os.unlink(temp_file_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating DOCX: {e}")
|
||||
raise
|
||||
|
||||
def _set_chinese_font(self, run):
|
||||
"""Set Chinese font for a run to prevent encoding issues"""
|
||||
try:
|
||||
run.font.name = 'Microsoft YaHei'
|
||||
# Set East Asian font for Chinese characters
|
||||
run._element.rPr.rFonts.set(
|
||||
'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}eastAsia',
|
||||
'Microsoft YaHei'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to set Chinese font: {e}")
|
||||
|
||||
def _set_paragraph_chinese_font(self, paragraph):
|
||||
"""Set Chinese font for all runs in a paragraph"""
|
||||
try:
|
||||
for run in paragraph.runs:
|
||||
self._set_chinese_font(run)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to set paragraph Chinese font: {e}")
|
||||
|
||||
def _generate_markdown_sync(
|
||||
self,
|
||||
scripts: List[SlideScriptData],
|
||||
project_title: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Generate Markdown document synchronously"""
|
||||
try:
|
||||
lines = []
|
||||
|
||||
# Add title
|
||||
lines.append(f'# {project_title} - 演讲稿\n')
|
||||
|
||||
# Add metadata if provided
|
||||
if metadata:
|
||||
lines.append('## 生成信息\n')
|
||||
|
||||
if 'generation_time' in metadata:
|
||||
import time
|
||||
gen_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata['generation_time']))
|
||||
lines.append(f'- **生成时间**:{gen_time}')
|
||||
|
||||
if 'total_estimated_duration' in metadata:
|
||||
lines.append(f'- **预计总时长**:{metadata["total_estimated_duration"]}')
|
||||
|
||||
if 'customization' in metadata:
|
||||
custom = metadata['customization']
|
||||
if 'tone' in custom:
|
||||
lines.append(f'- **语调风格**:{custom["tone"]}')
|
||||
if 'target_audience' in custom:
|
||||
lines.append(f'- **目标受众**:{custom["target_audience"]}')
|
||||
|
||||
lines.append('')
|
||||
|
||||
lines.append('---\n')
|
||||
|
||||
# Add scripts for each slide
|
||||
for script in scripts:
|
||||
# Add slide header
|
||||
if script.slide_index == -1:
|
||||
# Opening remarks
|
||||
lines.append('## 开场白\n')
|
||||
elif script.slide_index >= len(scripts) - 1 and script.slide_title == "结束语":
|
||||
# Closing remarks
|
||||
lines.append('## 结束语\n')
|
||||
else:
|
||||
# Regular slide
|
||||
lines.append(f'## 第{script.slide_index + 1}页:{script.slide_title}\n')
|
||||
|
||||
# Add duration if available
|
||||
if script.estimated_duration:
|
||||
lines.append(f'**预计时长**:{script.estimated_duration}\n')
|
||||
|
||||
# Add script content
|
||||
lines.append(script.script_content)
|
||||
lines.append('')
|
||||
|
||||
# Add speaker notes if available
|
||||
if script.speaker_notes:
|
||||
lines.append('> **演讲提示**:' + script.speaker_notes)
|
||||
lines.append('')
|
||||
|
||||
lines.append('---\n')
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Markdown: {e}")
|
||||
raise
|
||||
|
||||
def is_docx_available(self) -> bool:
|
||||
"""Check if DOCX export is available"""
|
||||
return DOCX_AVAILABLE
|
||||
|
||||
async def cleanup(self):
|
||||
"""Cleanup resources"""
|
||||
if hasattr(self, 'executor'):
|
||||
self.executor.shutdown(wait=True)
|
||||
|
||||
|
||||
# Global instance
|
||||
_speech_script_exporter = None
|
||||
|
||||
def get_speech_script_exporter() -> SpeechScriptExporter:
|
||||
"""Get global speech script exporter instance"""
|
||||
global _speech_script_exporter
|
||||
if _speech_script_exporter is None:
|
||||
_speech_script_exporter = SpeechScriptExporter()
|
||||
return _speech_script_exporter
|
||||
Reference in New Issue
Block a user