Files
LandPPT/src/summeryanyfile/main.py
2025-11-07 09:05:41 +08:00

346 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
主入口模块 - 命令行界面和主要功能
"""
import asyncio
import json
import sys
from pathlib import Path
from typing import Optional
import click
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
from rich.panel import Panel
from rich.table import Table
from .config.settings import load_settings, create_default_config, create_env_template
from .core.models import ProcessingConfig, ChunkStrategy
from .generators.ppt_generator import PPTOutlineGenerator
from .utils.logger import setup_logging, get_logger
from .utils.file_handler import FileHandler
from .utils.validators import validate_file_path, validate_url, validate_config
console = Console()
logger = get_logger(__name__)
@click.group()
@click.option('--config', '-c', help='配置文件路径')
@click.option('--log-level', default='INFO', help='日志级别')
@click.option('--debug', is_flag=True, help='启用调试模式')
@click.pass_context
def cli(ctx, config, log_level, debug):
"""通用文本转PPT大纲生成器"""
ctx.ensure_object(dict)
# 设置日志
if debug:
log_level = 'DEBUG'
setup_logging(level=log_level, rich_logging=True)
# 加载设置
settings = load_settings(config_file=config, debug_mode=debug)
ctx.obj['settings'] = settings
if debug:
console.print(f"[dim]配置加载完成: {settings.llm_provider}/{settings.llm_model}[/dim]")
@cli.command()
@click.argument('input_path')
@click.option('--output', '-o', help='输出文件路径')
@click.option('--encoding', help='文件编码')
@click.option('--max-slides', type=int, help='最大幻灯片数量')
@click.option('--min-slides', type=int, help='最小幻灯片数量')
@click.option('--chunk-size', type=int, help='文档块大小')
@click.option('--chunk-strategy', type=click.Choice(['paragraph', 'semantic', 'recursive', 'hybrid', 'fast']), help='分块策略')
@click.option('--model', help='LLM模型名称')
@click.option('--provider', type=click.Choice(['openai', 'anthropic', 'azure']), help='LLM提供商')
@click.option('--temperature', type=float, help='温度参数 (0.0-2.0)')
@click.option('--max-tokens', type=int, help='最大token数量')
@click.option('--base-url', help='自定义OpenAI API端点URL')
@click.option('--save-markdown', is_flag=True, help='保存转换后的Markdown文件到temp目录')
@click.option('--temp-dir', help='自定义temp目录路径')
@click.option('--no-magic-pdf', is_flag=True, help='禁用Magic-PDF强制使用MarkItDown处理PDF')
@click.option('--no-progress', is_flag=True, help='禁用进度条')
@click.pass_context
def generate(ctx, input_path, output, encoding, max_slides, min_slides, chunk_size, chunk_strategy,
model, provider, temperature, max_tokens, base_url, save_markdown, temp_dir, no_magic_pdf, no_progress):
"""生成PPT大纲"""
settings = ctx.obj['settings']
# 更新设置
if max_slides:
settings.max_slides = max_slides
if min_slides:
settings.min_slides = min_slides
if chunk_size:
settings.chunk_size = chunk_size
if chunk_strategy:
settings.chunk_strategy = chunk_strategy
if model:
settings.llm_model = model
if provider:
settings.llm_provider = provider
if temperature is not None:
settings.temperature = temperature
if max_tokens:
settings.max_tokens = max_tokens
if base_url:
settings.openai_base_url = base_url
# 验证配置
config_errors = validate_config(settings.__dict__)
if config_errors:
console.print("[red]配置错误:[/red]")
for error in config_errors:
console.print(f"{error}")
sys.exit(1)
# 运行生成
asyncio.run(_run_generation(
input_path, output, encoding, settings, not no_progress, save_markdown, temp_dir, not no_magic_pdf
))
async def _run_generation(
input_path: str,
output_path: Optional[str],
encoding: Optional[str],
settings,
show_progress: bool,
save_markdown: bool = False,
temp_dir: Optional[str] = None,
use_magic_pdf: bool = True
):
"""运行PPT生成"""
try:
# 验证输入
if validate_url(input_path):
console.print(f"[blue]正在处理URL:[/blue] {input_path}")
file_handler = FileHandler()
local_path = file_handler.handle_input(input_path)[0]
is_temp = True
elif validate_file_path(input_path):
console.print(f"[blue]正在处理文件:[/blue] {input_path}")
local_path = input_path
is_temp = False
else:
console.print(f"[red]无效的输入路径:[/red] {input_path}")
sys.exit(1)
# 创建生成器
config = settings.to_processing_config()
generator = PPTOutlineGenerator(
config,
save_markdown=save_markdown,
temp_dir=temp_dir,
use_magic_pdf=use_magic_pdf,
cache_dir=None # 使用默认缓存目录
)
# 显示配置信息
if save_markdown:
markdown_dir = temp_dir or generator.document_processor.temp_dir
console.print(f"[yellow]Markdown文件将保存到:[/yellow] {markdown_dir}")
# 显示PDF处理方式
if use_magic_pdf:
console.print("[green]PDF转换:[/green] 使用Magic-PDF (本地高质量)")
else:
console.print("[yellow]PDF转换:[/yellow] 使用MarkItDown (标准质量)")
# 进度回调
progress_task = None
if show_progress:
progress = Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
TimeElapsedColumn(),
console=console
)
progress.start()
progress_task = progress.add_task("初始化...", total=100)
def progress_callback(step_name: str, percent: float):
progress.update(progress_task, description=step_name, completed=percent)
else:
def progress_callback(step_name: str, percent: float):
console.print(f"[dim]{step_name} ({percent:.1f}%)[/dim]")
try:
# 生成PPT大纲
outline = await generator.generate_from_file(
local_path,
encoding=encoding,
progress_callback=progress_callback
)
if show_progress:
progress.stop()
# 输出结果
result_json = outline.to_dict()
if output_path:
# 保存到文件
output_file = Path(output_path)
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(result_json, f, ensure_ascii=False, indent=2)
console.print(f"[green]PPT大纲已保存到:[/green] {output_file}")
else:
# 输出到控制台
console.print(json.dumps(result_json, ensure_ascii=False, indent=2))
# 显示摘要
_show_generation_summary(outline)
finally:
if show_progress and progress_task:
progress.stop()
# 清理临时文件
if is_temp:
file_handler.cleanup_temp_file(local_path)
except Exception as e:
console.print(f"[red]生成失败:[/red] {e}")
logger.error(f"生成失败: {e}", exc_info=True)
sys.exit(1)
def _show_generation_summary(outline):
"""显示生成摘要"""
table = Table(title="PPT大纲摘要")
table.add_column("属性", style="cyan")
table.add_column("", style="green")
table.add_row("标题", outline.title)
table.add_row("总页数", str(outline.total_pages))
table.add_row("状态", outline.page_count_mode)
# 统计幻灯片类型
type_counts = {}
for slide in outline.slides:
slide_type = slide.slide_type
type_counts[slide_type] = type_counts.get(slide_type, 0) + 1
type_summary = ", ".join([f"{k}: {v}" for k, v in type_counts.items()])
table.add_row("幻灯片类型分布", type_summary)
console.print(table)
@cli.command()
@click.option('--provider', type=click.Choice(['openai', 'anthropic', 'azure']), help='LLM提供商')
def validate_setup(provider):
"""验证设置和API连接"""
console.print("[blue]正在验证设置...[/blue]")
settings = load_settings()
# 验证配置
config_errors = validate_config(settings.__dict__)
if config_errors:
console.print("[red]配置错误:[/red]")
for error in config_errors:
console.print(f"{error}")
return
# 验证LLM连接
from .core.llm_manager import LLMManager
llm_manager = LLMManager()
test_provider = provider or settings.llm_provider
if llm_manager.validate_configuration(test_provider, **settings.get_llm_kwargs()):
console.print(f"[green]✓[/green] {test_provider} 配置有效")
else:
console.print(f"[red]✗[/red] {test_provider} 配置无效")
return
# 测试LLM连接
try:
llm = llm_manager.get_llm(
model=settings.llm_model,
provider=test_provider,
**settings.get_llm_kwargs()
)
console.print(f"[green]✓[/green] LLM连接成功: {settings.llm_model}")
except Exception as e:
console.print(f"[red]✗[/red] LLM连接失败: {e}")
@cli.command()
def init_config():
"""初始化配置文件"""
console.print("[blue]正在创建配置文件...[/blue]")
create_default_config()
create_env_template()
console.print("[green]配置文件创建完成![/green]")
console.print("请编辑 .env 文件并填入您的API密钥")
@cli.command()
@click.argument('input_path')
def analyze(input_path):
"""分析文档结构"""
console.print(f"[blue]正在分析文档:[/blue] {input_path}")
try:
from .core.document_processor import DocumentProcessor
processor = DocumentProcessor()
doc_info = processor.load_document(input_path)
# 显示文档信息
info_table = Table(title="文档信息")
info_table.add_column("属性", style="cyan")
info_table.add_column("", style="green")
info_table.add_row("标题", doc_info.title)
info_table.add_row("文件类型", doc_info.file_type)
info_table.add_row("编码", doc_info.encoding)
info_table.add_row("大小", f"{doc_info.size:,} 字节")
info_table.add_row("内容长度", f"{len(doc_info.content):,} 字符")
console.print(info_table)
# 显示内容预览
preview = doc_info.content[:500] + "..." if len(doc_info.content) > 500 else doc_info.content
console.print(Panel(preview, title="内容预览"))
except Exception as e:
console.print(f"[red]分析失败:[/red] {e}")
@cli.command()
def list_models():
"""列出支持的模型"""
from .core.llm_manager import LLMManager
llm_manager = LLMManager()
for provider in llm_manager.SUPPORTED_PROVIDERS:
models = llm_manager.list_available_models(provider)
table = Table(title=f"{provider.upper()} 模型")
table.add_column("模型名称", style="cyan")
for model in models:
table.add_row(model)
console.print(table)
if __name__ == '__main__':
cli()