feat: 更新视频分析功能,支持提示词模板,提升复刻项目的灵活性和性能

This commit is contained in:
lk-eternal 2025-08-31 21:13:36 +08:00
parent c40c2b1039
commit c796ada84f
9 changed files with 27 additions and 44 deletions

1
.gitignore vendored
View File

@ -52,3 +52,4 @@ Thumbs.db
# Temporary files # Temporary files
*.tmp *.tmp
*.temp *.temp
.qiniu_pythonsdk_hostscache.json

View File

@ -43,5 +43,5 @@ EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/docs || exit 1 CMD curl -f http://localhost:8000/docs || exit 1
# 启动命令 # 启动命令 - 使用多worker提升并发性能
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--app-dir", "src"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--app-dir", "src", "--workers", "4", "--loop", "uvloop"]

View File

@ -90,8 +90,8 @@ class AIServiceImpl(AIService):
async def generate_video(self, frame_image_bytes: bytes, shot_prompt: str): async def generate_video(self, frame_image_bytes: bytes, shot_prompt: str):
return await self.gemini_client.generate_video(frame_image_bytes, shot_prompt) return await self.gemini_client.generate_video(frame_image_bytes, shot_prompt)
async def analyze_video(self, video_url: str): async def analyze_video(self, video_url: str, prompt_template: str):
return await self.gemini_client.analyze_video(video_url) return await self.gemini_client.analyze_video(video_url, prompt_template)
class StorageServiceImpl(StorageService): class StorageServiceImpl(StorageService):

View File

@ -434,8 +434,14 @@ async def replicate_from_video(
): ):
"""一键复刻从视频URL生成项目、素材和分镜""" """一键复刻从视频URL生成项目、素材和分镜"""
try: try:
# 视频分析提示词模板不需要特定的占位符变量因为视频内容直接传给AI模型
# 这里可以添加其他模板格式验证逻辑,如果需要的话
# 调用业务逻辑 # 调用业务逻辑
result = await project_use_cases.replicate_from_video(request.video_url) result = await project_use_cases.replicate_from_video(
request.video_url,
request.prompt_template
)
project = result["project"] project = result["project"]
assets = result["assets"] assets = result["assets"]

View File

@ -157,6 +157,7 @@ class ComposeVideoResponse(BaseModel):
class VideoReplicateRequest(BaseModel): class VideoReplicateRequest(BaseModel):
"""一键复刻请求模式""" """一键复刻请求模式"""
video_url: str = Field(..., description="要复刻的视频URL") video_url: str = Field(..., description="要复刻的视频URL")
prompt_template: str = Field(..., description="视频分析提示词模板")
# 更新引用 # 更新引用

View File

@ -590,12 +590,13 @@ class ProjectUseCases:
logger.error(f"生成视频失败: {e}") logger.error(f"生成视频失败: {e}")
raise ValueError(f"生成视频失败: {e}") raise ValueError(f"生成视频失败: {e}")
async def replicate_from_video(self, video_url: str) -> Dict[str, Any]: async def replicate_from_video(self, video_url: str, prompt_template: str) -> Dict[str, Any]:
""" """
一键复刻从视频URL生成项目素材和分镜 一键复刻从视频URL生成项目素材和分镜
Args: Args:
video_url: 要复刻的视频URL video_url: 要复刻的视频URL
prompt_template: 视频分析提示词模板
Returns: Returns:
包含projectassetsstoryboards的字典 包含projectassetsstoryboards的字典
@ -605,7 +606,7 @@ class ProjectUseCases:
# 1. 使用Gemini分析视频内容 # 1. 使用Gemini分析视频内容
logger.info("正在分析视频内容...") logger.info("正在分析视频内容...")
analysis_result = await self.ai_service.analyze_video(video_url) analysis_result = await self.ai_service.analyze_video(video_url, prompt_template)
if not analysis_result: if not analysis_result:
raise ValueError("视频分析失败") raise ValueError("视频分析失败")

View File

@ -110,12 +110,13 @@ class AIService(ABC):
pass pass
@abstractmethod @abstractmethod
async def analyze_video(self, video_url: str) -> Optional[Dict[str, Any]]: async def analyze_video(self, video_url: str, prompt_template: str) -> Optional[Dict[str, Any]]:
""" """
分析视频内容提取关键素材帧和分镜关键帧 分析视频内容提取关键素材帧和分镜关键帧
Args: Args:
video_url: 视频URL video_url: 视频URL
prompt_template: 视频分析提示词模板
Returns: Returns:
分析结果字典包含key_assets_frames和key_storyboard_frames 分析结果字典包含key_assets_frames和key_storyboard_frames

View File

@ -11,6 +11,7 @@ from google.genai import types
from ..config import settings from ..config import settings
from ..utils import safe_json_loads from ..utils import safe_json_loads
from .key_pool_manager import key_pool_manager from .key_pool_manager import key_pool_manager
from ..services.template_service import TemplateService
from loguru import logger from loguru import logger
import ssl import ssl
import urllib3 import urllib3
@ -217,7 +218,7 @@ class GeminiClient:
# 调用Veo-3.0 API生成视频 # 调用Veo-3.0 API生成视频
operation = self._current_client.models.generate_videos( operation = self._current_client.models.generate_videos(
model="veo-3.0-fast-generate-preview", #veo-3.0-generate-preview model="veo-3.0-generate-preview", #veo-3.0-fast-generate-preview
prompt=video_prompt, prompt=video_prompt,
image=image_input image=image_input
) )
@ -252,12 +253,13 @@ class GeminiClient:
logger.error(f"Veo-3.0视频生成失败: {e}") logger.error(f"Veo-3.0视频生成失败: {e}")
raise e raise e
async def analyze_video(self, video_url: str) -> Optional[Dict[str, Any]]: async def analyze_video(self, video_url: str, prompt_template: str) -> Optional[Dict[str, Any]]:
""" """
分析视频内容提取关键素材帧和分镜关键帧 分析视频内容提取关键素材帧和分镜关键帧
Args: Args:
video_url: 视频URL video_url: 视频URL
prompt_template: 视频分析提示词模板
Returns: Returns:
分析结果字典包含key_assets_frames和key_storyboard_frames 分析结果字典包含key_assets_frames和key_storyboard_frames
@ -308,37 +310,8 @@ class GeminiClient:
return myfile return myfile
# 构建分析提示词 # 直接使用提示词模板作为分析提示词
analysis_prompt = """ analysis_prompt = prompt_template
请仔细分析这个视频并返回以下JSON格式的结果
{
"title": "为这个视频生成一个简洁有吸引力的标题不超过20个字符",
"script": "根据视频内容生成的完整剧本,包含对话、动作、场景描述等",
"key_assets_frames": [
{
"timestamp": "HH:MM:SS",
"name": "素材名称",
"description": "素材描述",
"tags": ["标签1", "标签2"]
}
],
"key_storyboard_frames": [
{
"timestamp": "HH:MM:SS",
"frame_prompt": "该帧画面描述",
"shot_prompt": "该关键帧到下一关键帧之间的剧情描述"
}
]
}
要求
1. title: 根据视频主题和内容生成简洁有吸引力的标题要能概括视频核心内容不超过20个字符
2. script: 根据视频内容生成完整的剧本包含场景描述角色对话动作指导等要生动详细
3. key_assets_frames: 提取3-5个关键素材帧包含视觉元素如角色场景道具动物等
4. key_storyboard_frames: 提取分镜关键帧约每8秒一帧
5. timestamp格式必须是HH:MM:SS
6. 确保返回的是有效的JSON格式不要包含其他文字
"""
myfile = self._execute_with_retry(_upload_and_analyze) myfile = self._execute_with_retry(_upload_and_analyze)

View File

@ -21,7 +21,7 @@ class OpenRouterClient:
async def generate_text( async def generate_text(
self, self,
prompt: str, prompt: str,
model: str = "anthropic/claude-3.5-sonnet" model: str = "google/gemini-2.5-pro"
) -> Optional[str]: ) -> Optional[str]:
""" """
生成文本内容 生成文本内容
@ -66,7 +66,7 @@ class OpenRouterClient:
self, self,
prompt_template: str, prompt_template: str,
script_or_idea: str, script_or_idea: str,
model: str = "anthropic/claude-3.5-sonnet" model: str = "google/gemini-2.5-pro"
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
""" """
生成完整剧本和素材信息 生成完整剧本和素材信息
@ -107,7 +107,7 @@ class OpenRouterClient:
text: str, text: str,
source_lang: str = "zh", source_lang: str = "zh",
target_lang: str = "en", target_lang: str = "en",
model: str = "anthropic/claude-3.5-sonnet" model: str = "google/gemini-2.5-pro"
) -> Optional[str]: ) -> Optional[str]:
""" """
翻译文本 翻译文本