Add File
This commit is contained in:
297
src/landppt/services/image/providers/searxng_image_provider.py
Normal file
297
src/landppt/services/image/providers/searxng_image_provider.py
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
"""
|
||||||
|
SearXNG图片搜索提供者
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import aiohttp
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..models import (
|
||||||
|
ImageInfo, ImageSearchRequest, ImageSearchResult, ImageOperationResult,
|
||||||
|
ImageProvider, ImageSourceType, ImageMetadata, ImageLicense
|
||||||
|
)
|
||||||
|
from .base import ImageSearchProvider
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SearXNGSearchProvider(ImageSearchProvider):
|
||||||
|
"""SearXNG图片搜索提供者"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict[str, Any]):
|
||||||
|
self.host = config.get('host', '')
|
||||||
|
self.per_page = config.get('per_page', 20)
|
||||||
|
self.rate_limit_requests = config.get('rate_limit_requests', 60)
|
||||||
|
self.rate_limit_window = config.get('rate_limit_window', 60) # 1分钟
|
||||||
|
self.timeout = config.get('timeout', 30)
|
||||||
|
self.language = config.get('language', 'auto')
|
||||||
|
self.theme = config.get('theme', 'simple')
|
||||||
|
|
||||||
|
# 请求限制跟踪
|
||||||
|
self._request_times = []
|
||||||
|
|
||||||
|
# 设置enabled状态基于host配置
|
||||||
|
config_with_enabled = config.copy()
|
||||||
|
config_with_enabled['enabled'] = bool(self.host and self.host.strip())
|
||||||
|
|
||||||
|
super().__init__(ImageProvider.SEARXNG, config_with_enabled)
|
||||||
|
|
||||||
|
async def search(self, request: ImageSearchRequest) -> ImageSearchResult:
|
||||||
|
"""搜索图片"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
if not self.enabled:
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=[], total_count=0, page=request.page,
|
||||||
|
per_page=request.per_page, has_next=False, has_prev=False,
|
||||||
|
search_time=0.0, provider=self.provider,
|
||||||
|
error="SearXNG host not configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 检查请求限制
|
||||||
|
if not self._check_rate_limit():
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=[], total_count=0, page=request.page,
|
||||||
|
per_page=request.per_page, has_next=False, has_prev=False,
|
||||||
|
search_time=time.time() - start_time, provider=self.provider,
|
||||||
|
error="Rate limit exceeded"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 构建搜索URL
|
||||||
|
search_url = f"{self.host.rstrip('/')}/search"
|
||||||
|
|
||||||
|
# 构建查询参数
|
||||||
|
params = {
|
||||||
|
'q': request.query,
|
||||||
|
'categories': 'images',
|
||||||
|
'language': self.language,
|
||||||
|
'theme': self.theme,
|
||||||
|
'format': 'json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# 记录请求时间
|
||||||
|
self._request_times.append(time.time())
|
||||||
|
|
||||||
|
# 发送请求
|
||||||
|
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
|
||||||
|
async with session.get(search_url, params=params) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
error_msg = f"SearXNG API returned status {response.status}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=[], total_count=0, page=request.page,
|
||||||
|
per_page=request.per_page, has_next=False, has_prev=False,
|
||||||
|
search_time=time.time() - start_time, provider=self.provider,
|
||||||
|
error=error_msg
|
||||||
|
)
|
||||||
|
|
||||||
|
data = await response.json()
|
||||||
|
|
||||||
|
# 解析搜索结果
|
||||||
|
images = []
|
||||||
|
results = data.get('results', [])
|
||||||
|
|
||||||
|
# 计算分页
|
||||||
|
start_index = (request.page - 1) * request.per_page
|
||||||
|
end_index = start_index + request.per_page
|
||||||
|
page_results = results[start_index:end_index]
|
||||||
|
|
||||||
|
for result in page_results:
|
||||||
|
try:
|
||||||
|
image_info = await self._parse_search_result(result)
|
||||||
|
if image_info:
|
||||||
|
images.append(image_info)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to parse SearXNG result: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 计算分页信息
|
||||||
|
total_count = len(results)
|
||||||
|
has_next = end_index < total_count
|
||||||
|
has_prev = request.page > 1
|
||||||
|
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=images,
|
||||||
|
total_count=total_count,
|
||||||
|
page=request.page,
|
||||||
|
per_page=request.per_page,
|
||||||
|
has_next=has_next,
|
||||||
|
has_prev=has_prev,
|
||||||
|
search_time=time.time() - start_time,
|
||||||
|
provider=self.provider
|
||||||
|
)
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
error_msg = "SearXNG search request timed out"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=[], total_count=0, page=request.page,
|
||||||
|
per_page=request.per_page, has_next=False, has_prev=False,
|
||||||
|
search_time=time.time() - start_time, provider=self.provider,
|
||||||
|
error=error_msg
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"SearXNG search failed: {str(e)}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return ImageSearchResult(
|
||||||
|
images=[], total_count=0, page=request.page,
|
||||||
|
per_page=request.per_page, has_next=False, has_prev=False,
|
||||||
|
search_time=time.time() - start_time, provider=self.provider,
|
||||||
|
error=error_msg
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _parse_search_result(self, result: Dict[str, Any]) -> Optional[ImageInfo]:
|
||||||
|
"""解析搜索结果为ImageInfo对象"""
|
||||||
|
try:
|
||||||
|
# 获取图片URL
|
||||||
|
img_src = result.get('img_src')
|
||||||
|
if not img_src:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 生成唯一ID
|
||||||
|
image_id = hashlib.md5(img_src.encode()).hexdigest()
|
||||||
|
|
||||||
|
# 解析分辨率
|
||||||
|
resolution = result.get('resolution', '')
|
||||||
|
width, height = self._parse_resolution(resolution)
|
||||||
|
|
||||||
|
# 解析文件大小
|
||||||
|
filesize_str = result.get('filesize', '')
|
||||||
|
file_size = self._parse_filesize(filesize_str)
|
||||||
|
|
||||||
|
# 解析图片格式,确保是有效的格式
|
||||||
|
img_format = result.get('img_format', 'jpg').lower()
|
||||||
|
valid_formats = ['jpeg', 'jpg', 'png', 'gif', 'webp', 'bmp', 'tiff']
|
||||||
|
if img_format not in valid_formats:
|
||||||
|
img_format = 'jpg' # 默认为jpg
|
||||||
|
|
||||||
|
# 创建元数据
|
||||||
|
metadata = ImageMetadata(
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
format=img_format,
|
||||||
|
file_size=file_size,
|
||||||
|
color_mode='RGB',
|
||||||
|
has_transparency=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# 创建ImageInfo对象
|
||||||
|
image_info = ImageInfo(
|
||||||
|
image_id=image_id,
|
||||||
|
source_type=ImageSourceType.WEB_SEARCH,
|
||||||
|
provider=self.provider,
|
||||||
|
original_url=img_src,
|
||||||
|
local_path='', # 将在下载时设置
|
||||||
|
filename=f"searxng_{image_id}.{img_format}",
|
||||||
|
title=result.get('title', ''),
|
||||||
|
description=result.get('content', ''),
|
||||||
|
metadata=metadata,
|
||||||
|
license=ImageLicense.UNKNOWN,
|
||||||
|
author=result.get('source', ''),
|
||||||
|
source_url=result.get('url', img_src)
|
||||||
|
)
|
||||||
|
|
||||||
|
return image_info
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to parse SearXNG result: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_resolution(self, resolution: str) -> tuple:
|
||||||
|
"""解析分辨率字符串"""
|
||||||
|
if not resolution:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# 匹配 "1280x720" 或 "1280×720" 格式
|
||||||
|
match = re.match(r'(\d+)[x×](\d+)', resolution)
|
||||||
|
if match:
|
||||||
|
return int(match.group(1)), int(match.group(2))
|
||||||
|
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
def _parse_filesize(self, filesize_str: str) -> int:
|
||||||
|
"""解析文件大小字符串"""
|
||||||
|
if not filesize_str:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# 匹配 "113.58 KB" 格式
|
||||||
|
match = re.match(r'([\d.]+)\s*(KB|MB|GB)', filesize_str.upper())
|
||||||
|
if match:
|
||||||
|
size = float(match.group(1))
|
||||||
|
unit = match.group(2)
|
||||||
|
|
||||||
|
if unit == 'KB':
|
||||||
|
return int(size * 1024)
|
||||||
|
elif unit == 'MB':
|
||||||
|
return int(size * 1024 * 1024)
|
||||||
|
elif unit == 'GB':
|
||||||
|
return int(size * 1024 * 1024 * 1024)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _check_rate_limit(self) -> bool:
|
||||||
|
"""检查请求速率限制"""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# 清理过期的请求记录
|
||||||
|
self._request_times = [
|
||||||
|
req_time for req_time in self._request_times
|
||||||
|
if current_time - req_time < self.rate_limit_window
|
||||||
|
]
|
||||||
|
|
||||||
|
# 检查是否超过限制
|
||||||
|
return len(self._request_times) < self.rate_limit_requests
|
||||||
|
|
||||||
|
async def get_image_details(self, image_id: str) -> Optional[ImageInfo]:
|
||||||
|
"""获取图片详细信息"""
|
||||||
|
# SearXNG不支持通过ID获取详细信息
|
||||||
|
logger.warning("SearXNG does not support getting image details by ID")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def download_image(self, image_info: ImageInfo, save_path: Path) -> ImageOperationResult:
|
||||||
|
"""下载图片到本地"""
|
||||||
|
try:
|
||||||
|
if not image_info.original_url:
|
||||||
|
return ImageOperationResult(
|
||||||
|
success=False,
|
||||||
|
message="No original URL available for download"
|
||||||
|
)
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
|
||||||
|
async with session.get(image_info.original_url) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
return ImageOperationResult(
|
||||||
|
success=False,
|
||||||
|
message=f"Failed to download image: HTTP {response.status}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 确保目录存在
|
||||||
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# 保存文件
|
||||||
|
with open(save_path, 'wb') as f:
|
||||||
|
async for chunk in response.content.iter_chunked(8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
# 更新本地路径
|
||||||
|
image_info.local_path = str(save_path)
|
||||||
|
|
||||||
|
return ImageOperationResult(
|
||||||
|
success=True,
|
||||||
|
message="Image downloaded successfully",
|
||||||
|
image_info=image_info
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Failed to download image from SearXNG: {str(e)}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return ImageOperationResult(
|
||||||
|
success=False,
|
||||||
|
message=error_msg
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user