2025-08-31 18:38:41 +08:00

61 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
import json
from typing import Optional, Union, Any
from loguru import logger
def clean_json_markdown(text: str) -> str:
"""
清理文本中的markdown格式提取纯JSON内容
Args:
text: 包含markdown格式的文本
Returns:
清理后的JSON字符串
"""
if not text:
return text
# 移除markdown代码块格式 ```json ... ``` 或 ``` ... ```
# 支持多种情况:```json、```JSON、```
pattern = r'```(?:json|JSON)?\s*(.*?)\s*```'
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
if match:
# 如果找到markdown格式提取其中的内容
cleaned_text = match.group(1).strip()
else:
# 如果没有找到markdown格式直接使用原文本
cleaned_text = text.strip()
return cleaned_text
def safe_json_loads(text: str) -> Optional[Union[dict, list, Any]]:
"""
安全地解析JSON字符串自动清理markdown格式
Args:
text: 包含JSON的文本字符串
Returns:
解析后的Python对象失败返回None
"""
if not text:
return None
try:
# 先清理markdown格式
cleaned_text = clean_json_markdown(text)
# 尝试解析JSON
return json.loads(cleaned_text)
except json.JSONDecodeError as e:
logger.error(f"JSON解析失败: {e}, 原文本: {text[:200]}...")
return None
except Exception as e:
logger.error(f"JSON处理异常: {e}")
return None