Add File

2025-11-07 09:05:15 +08:00
parent e0a0851067
commit 49894e4913
1 changed files with 351 additions and 0 deletions
--- a/src/landppt/services/image/providers/unsplash_provider.py
+++ b/src/landppt/services/image/providers/unsplash_provider.py
@@ -0,0 +1,351 @@
+"""
+Unsplash图片搜索提供者
+"""
+
+import asyncio
+import logging
+import time
+from typing import List, Optional, Dict, Any
+from pathlib import Path
+import aiohttp
+import hashlib
+
+from ..models import (
+    ImageInfo, ImageSearchRequest, ImageSearchResult, ImageOperationResult,
+    ImageSourceType, ImageProvider, ImageFormat, ImageMetadata, ImageTag, ImageLicense
+)
+from .base import ImageSearchProvider
+
+logger = logging.getLogger(__name__)
+
+
+class UnsplashSearchProvider(ImageSearchProvider):
+    """Unsplash图片搜索提供者"""
+    
+    def __init__(self, config: Dict[str, Any]):
+        self.api_key = config.get('api_key', '')
+        self.api_base = config.get('api_base', 'https://api.unsplash.com')
+        self.per_page = config.get('per_page', 20)
+        self.rate_limit_requests = config.get('rate_limit_requests', 50)
+        self.rate_limit_window = config.get('rate_limit_window', 3600)  # 1小时
+        self.timeout = config.get('timeout', 30)
+
+        # 请求限制跟踪
+        self._request_times = []
+
+        # 设置enabled状态基于API密钥
+        config_with_enabled = config.copy()
+        config_with_enabled['enabled'] = bool(self.api_key)
+
+        super().__init__(ImageProvider.UNSPLASH, config_with_enabled)
+    
+    async def search(self, request: ImageSearchRequest) -> ImageSearchResult:
+        """搜索图片"""
+        start_time = time.time()
+        
+        if not self.enabled:
+            return ImageSearchResult(
+                images=[], total_count=0, page=request.page,
+                per_page=request.per_page, has_next=False, has_prev=False,
+                search_time=0.0, provider=self.provider,
+                error="Unsplash API key not configured"
+            )
+        
+        try:
+            # 检查请求限制
+            if not self._check_rate_limit():
+                return ImageSearchResult(
+                    images=[], total_count=0, page=request.page,
+                    per_page=request.per_page, has_next=False, has_prev=False,
+                    search_time=time.time() - start_time, provider=self.provider,
+                    error="Rate limit exceeded"
+                )
+            
+            # 构建搜索URL
+            url = f"{self.api_base}/search/photos"
+            params = {
+                'client_id': self.api_key,
+                'query': request.query,
+                'page': request.page,
+                'per_page': min(request.per_page, self.per_page),
+                'order_by': 'relevant'
+            }
+            
+            # 添加语言参数
+            if hasattr(request, 'language') and request.language:
+                # 将中文语言代码转换为Unsplash支持的格式
+                lang_map = {
+                    'zh': 'en',  # Unsplash主要支持英文，中文查询会自动处理
+                    'zh-cn': 'en',
+                    'zh-tw': 'en'
+                }
+                params['lang'] = lang_map.get(request.language.lower(), request.language.lower())
+            
+            # 发送请求
+            logger.debug(f"Unsplash search: {url} with params: {params}")
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
+                async with session.get(url, params=params) as response:
+                    if response.status == 200:
+                        data = await response.json()
+                        logger.debug(f"Unsplash API returned {len(data.get('results', []))} results")
+                        images = await self._parse_search_results(data)
+                        logger.debug(f"Successfully parsed {len(images)} images")
+
+                        total_count = data.get('total', 0)
+                        total_pages = data.get('total_pages', 0)
+                        current_page = request.page
+                        
+                        return ImageSearchResult(
+                            images=images,
+                            total_count=total_count,
+                            page=current_page,
+                            per_page=request.per_page,
+                            has_next=current_page < total_pages,
+                            has_prev=current_page > 1,
+                            search_time=time.time() - start_time,
+                            provider=self.provider
+                        )
+                    else:
+                        error_msg = f"Unsplash API error: {response.status}"
+                        if response.status == 401:
+                            error_msg = "Invalid Unsplash API key"
+                        elif response.status == 403:
+                            error_msg = "Unsplash API rate limit exceeded"
+                        
+                        logger.error(f"Unsplash search failed: {error_msg}")
+                        return ImageSearchResult(
+                            images=[], total_count=0, page=request.page,
+                            per_page=request.per_page, has_next=False, has_prev=False,
+                            search_time=time.time() - start_time, provider=self.provider,
+                            error=error_msg
+                        )
+                        
+        except Exception as e:
+            logger.error(f"Unsplash search failed: {e}")
+            return ImageSearchResult(
+                images=[], total_count=0, page=request.page,
+                per_page=request.per_page, has_next=False, has_prev=False,
+                search_time=time.time() - start_time, provider=self.provider,
+                error=str(e)
+            )
+    
+    async def _parse_search_results(self, data: Dict[str, Any]) -> List[ImageInfo]:
+        """解析搜索结果"""
+        images = []
+        results = data.get('results', [])
+        
+        for item in results:
+            try:
+                image_info = await self._create_image_info_from_unsplash(item)
+                if image_info:
+                    images.append(image_info)
+            except Exception as e:
+                logger.warning(f"Failed to parse Unsplash image: {e}")
+                continue
+        
+        return images
+    
+    async def _create_image_info_from_unsplash(self, item: Dict[str, Any]) -> Optional[ImageInfo]:
+        """从Unsplash数据创建ImageInfo"""
+        try:
+            # 基本信息
+            image_id = item.get('id', '')
+            if not image_id:
+                return None
+            
+            # 生成唯一的内部ID
+            internal_id = hashlib.md5(f"unsplash_{image_id}".encode()).hexdigest()
+            
+            # 图片URLs
+            urls = item.get('urls', {})
+            original_url = urls.get('raw', urls.get('full', urls.get('regular', '')))
+
+            # 如果仍然没有URL，记录错误并跳过
+            if not original_url:
+                logger.warning(f"Unsplash image {image_id} has no valid URL: {urls}")
+                return None
+
+            # 图片尺寸
+            width = item.get('width', 0)
+            height = item.get('height', 0)
+            
+            # 估算文件大小（基于尺寸的粗略估算）
+            estimated_size = int(width * height * 0.3)  # 假设每像素0.3字节
+            
+            # 创建元数据
+            metadata = ImageMetadata(
+                width=width,
+                height=height,
+                format=ImageFormat.JPEG,  # Unsplash主要提供JPEG格式
+                file_size=estimated_size,
+                color_mode='RGB'
+            )
+            
+            # 标签
+            tags = []
+            if 'tags' in item:
+                for tag_item in item['tags']:
+                    if isinstance(tag_item, dict) and 'title' in tag_item:
+                        tags.append(ImageTag(name=tag_item['title'], category='unsplash'))
+                    elif isinstance(tag_item, str):
+                        tags.append(ImageTag(name=tag_item, category='unsplash'))
+            
+            # 用户信息
+            user = item.get('user', {})
+            author = user.get('name', '')
+            author_url = user.get('links', {}).get('html', '')
+            
+            # 创建ImageInfo
+            image_info = ImageInfo(
+                image_id=internal_id,
+                source_type=ImageSourceType.WEB_SEARCH,
+                provider=ImageProvider.UNSPLASH,
+                original_url=original_url,
+                local_path='',  # 将在下载时设置
+                filename=self._generate_meaningful_filename(item, image_id),
+                title=item.get('alt_description', item.get('description', f'Unsplash Image {image_id}')),
+                description=item.get('description', ''),
+                alt_text=item.get('alt_description', ''),
+                metadata=metadata,
+                tags=tags,
+                license=ImageLicense.UNSPLASH_LICENSE,
+                author=author,
+                author_url=author_url,
+                source_url=item.get('links', {}).get('html', ''),
+                created_at=time.time()
+            )
+            
+            return image_info
+            
+        except Exception as e:
+            logger.error(f"Failed to create ImageInfo from Unsplash data: {e}")
+            return None
+    
+    async def get_image_details(self, image_id: str) -> Optional[ImageInfo]:
+        """获取图片详细信息"""
+        # 从image_id中提取Unsplash ID
+        if image_id.startswith('unsplash_'):
+            unsplash_id = image_id[9:]  # 移除'unsplash_'前缀
+        else:
+            unsplash_id = image_id
+        
+        try:
+            url = f"{self.api_base}/photos/{unsplash_id}"
+            params = {'client_id': self.api_key}
+            
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
+                async with session.get(url, params=params) as response:
+                    if response.status == 200:
+                        data = await response.json()
+                        return await self._create_image_info_from_unsplash(data)
+                    else:
+                        logger.error(f"Failed to get Unsplash image details: {response.status}")
+                        return None
+                        
+        except Exception as e:
+            logger.error(f"Failed to get Unsplash image details: {e}")
+            return None
+    
+    async def download_image(self, image_info: ImageInfo, save_path: Path) -> ImageOperationResult:
+        """下载图片到本地"""
+        try:
+            if not image_info.original_url:
+                return ImageOperationResult(
+                    success=False,
+                    message="No download URL available",
+                    error_code="no_url"
+                )
+            
+            # 创建保存目录
+            save_path.parent.mkdir(parents=True, exist_ok=True)
+            
+            # 下载图片
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
+                async with session.get(image_info.original_url) as response:
+                    if response.status == 200:
+                        with open(save_path, 'wb') as f:
+                            async for chunk in response.content.iter_chunked(8192):
+                                f.write(chunk)
+                        
+                        # 更新本地路径
+                        image_info.local_path = str(save_path)
+                        
+                        return ImageOperationResult(
+                            success=True,
+                            message="Image downloaded successfully",
+                            image_info=image_info
+                        )
+                    else:
+                        return ImageOperationResult(
+                            success=False,
+                            message=f"Download failed: HTTP {response.status}",
+                            error_code="download_failed"
+                        )
+                        
+        except Exception as e:
+            logger.error(f"Failed to download Unsplash image: {e}")
+            return ImageOperationResult(
+                success=False,
+                message=f"Download failed: {str(e)}",
+                error_code="download_error"
+            )
+    
+    def _check_rate_limit(self) -> bool:
+        """检查请求限制"""
+        current_time = time.time()
+        
+        # 清理过期的请求记录
+        self._request_times = [
+            t for t in self._request_times 
+            if current_time - t < self.rate_limit_window
+        ]
+        
+        # 检查是否超过限制
+        if len(self._request_times) >= self.rate_limit_requests:
+            return False
+        
+        # 记录当前请求时间
+        self._request_times.append(current_time)
+        return True
+
+    def _generate_meaningful_filename(self, item: Dict[str, Any], image_id: str) -> str:
+        """生成有意义的文件名"""
+        try:
+            # 获取描述或alt描述作为文件名基础
+            alt_description = item.get('alt_description', '')
+            description = item.get('description', '')
+
+            # 优先使用alt_description，因为它通常更简洁
+            base_text = alt_description or description
+
+            if base_text:
+                # 清理文本，只保留字母数字和空格
+                clean_text = ''.join(c for c in base_text if c.isalnum() or c in ' -_')
+                clean_text = clean_text.strip().replace(' ', '_')
+
+                # 取前几个单词，限制长度
+                words = clean_text.split('_')[:4]  # 最多4个单词
+                if words and all(word for word in words):
+                    base_name = '_'.join(words)
+
+                    # 限制文件名长度
+                    max_length = 50
+                    if len(base_name) > max_length:
+                        base_name = base_name[:max_length].rstrip('_')
+
+                    return f"unsplash_{base_name}_{image_id}.jpg"
+
+            # 如果没有有效描述，检查用户名
+            user_name = item.get('user', {}).get('username', '')
+            if user_name:
+                clean_user = ''.join(c for c in user_name if c.isalnum() or c in '_')
+                if clean_user:
+                    return f"unsplash_by_{clean_user}_{image_id}.jpg"
+
+            # 默认命名
+            return f"unsplash_photo_{image_id}.jpg"
+
+        except Exception as e:
+            logger.warning(f"Failed to generate meaningful filename: {e}")
+            # 回退到简单命名
+            return f"unsplash_{image_id}.jpg"