mirror of
https://github.com/Wcowin/Mkdocs-Wcowin.git
synced 2025-07-20 08:56:35 +00:00
Moved the resume PDF from docs/about to docs/assets and updated the iframe src in resume.md accordingly. Changed the default for AI_SUMMARY_LOCAL_ENABLED to 'false' in ai_summary.py. Updated .ai_cache with new summary and service config timestamp.
1068 lines
45 KiB
Python
1068 lines
45 KiB
Python
from dotenv import load_dotenv
|
||
load_dotenv() # 自动加载 .env 文件
|
||
|
||
import re
|
||
import json
|
||
import hashlib
|
||
import requests
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
import os
|
||
import shutil
|
||
|
||
class AISummaryGenerator:
|
||
def __init__(self):
|
||
# 🗂️ 统一缓存路径策略 - 本地和CI环境都使用项目根目录
|
||
# 这样避免了CI构建时被清理,也简化了路径管理
|
||
self.cache_dir = Path(".ai_cache")
|
||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 🚀 CI 环境配置 - 默认只在 CI 环境中启用
|
||
# AI摘要环境配置
|
||
self.ci_config = {
|
||
# CI部署环境开关 (不用管,只在ci.yml中设置有效)
|
||
'enabled_in_ci': os.getenv('AI_SUMMARY_CI_ENABLED', 'true').lower() == 'true',
|
||
|
||
# 本地部署环境开关 (true=本地开发时启用AI摘要)
|
||
'enabled_in_local': os.getenv('AI_SUMMARY_LOCAL_ENABLED', 'false').lower() == 'true',
|
||
|
||
# CI部署仅缓存模式(不用管,只在ci.yml中设置有效)
|
||
'ci_only_cache': os.getenv('AI_SUMMARY_CI_ONLY_CACHE', 'false').lower() == 'true',
|
||
|
||
# 本地部署缓存功能开关 (true=启用缓存避免重复生成, false=总是生成新摘要)
|
||
'cache_enabled': os.getenv('AI_SUMMARY_CACHE_ENABLED', 'true').lower() == 'true',
|
||
|
||
# CI部署备用摘要开关 (不用管,只在ci.yml中设置有效)
|
||
'ci_fallback_enabled': os.getenv('AI_SUMMARY_CI_FALLBACK', 'true').lower() == 'true',
|
||
}
|
||
|
||
# 🔄 自动缓存迁移逻辑(一次性迁移旧缓存) - 移到ci_config初始化之后
|
||
self._auto_migrate_cache()
|
||
|
||
# 添加服务配置文件,用于跟踪当前使用的服务
|
||
self.service_config_file = self.cache_dir / "service_config.json"
|
||
|
||
# 🤖 多AI服务配置
|
||
self.ai_services = {
|
||
'glm': {
|
||
'url': 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
|
||
'model': 'glm-4-flash', # 或 'glm-4-plus', 'glm-4-air'
|
||
'api_key': os.getenv('GLM_API_KEY', ),
|
||
'max_tokens': 300,
|
||
'temperature': 0.3
|
||
},
|
||
'openai': {
|
||
'url': 'https://api.chatanywhere.tech/v1/chat/completions',
|
||
'model': 'gpt-3.5-turbo', # 或 'gpt-4', 'gpt-4-turbo'
|
||
'api_key': os.getenv('OPENAI_API_KEY', ),
|
||
'max_tokens': 150,
|
||
'temperature': 0.3
|
||
},
|
||
# 'claude': {
|
||
# 'url': 'https://api.anthropic.com/v1/messages',
|
||
# 'model': 'claude-3-haiku-20240307',
|
||
# 'api_key': os.getenv('ANTHROPIC_API_KEY', 'your-claude-api-key'),
|
||
# 'max_tokens': 150,
|
||
# 'temperature': 0.3
|
||
# },
|
||
'gemini': {
|
||
'url': 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent',
|
||
'model': 'gemini-pro',
|
||
'api_key': os.getenv('GOOGLE_API_KEY', 'AIzaSyDwWgffCCyVFZVsRasX3B3arWFaCT1PzNI'),
|
||
'max_tokens': 150,
|
||
'temperature': 0.3
|
||
}
|
||
}
|
||
|
||
# 默认使用的AI服务
|
||
self.default_service = 'glm'
|
||
|
||
# 服务优先级(按顺序尝试)
|
||
self.service_fallback_order = ['glm', 'openai', 'claude', 'gemini']
|
||
|
||
# 📂 可自定义的文件夹配置
|
||
self.enabled_folders = [
|
||
'blog/', # blog文件夹
|
||
'develop/', # develop文件夹
|
||
# 'posts/', # posts文件夹
|
||
# 'trip/', # trip文件夹
|
||
# 'about/', # about文件夹
|
||
]
|
||
|
||
# 📋 排除的文件和文件夹
|
||
self.exclude_patterns = [
|
||
'waline.md', 'link.md', '404.md', 'tag.md', 'tags.md',
|
||
'/about/', '/search/', '/sitemap', '/admin/',
|
||
'index.md', # 根目录index.md
|
||
]
|
||
|
||
# 📋 排除的特定文件
|
||
self.exclude_files = [
|
||
'blog/index.md',
|
||
'blog/indexblog.md',
|
||
'docs/index.md',
|
||
'develop/index.md',
|
||
]
|
||
|
||
# 🌍 语言配置/Language Configuration
|
||
self.summary_language = 'zh' # 默认中文,可选 'zh'、'en'、'both'
|
||
|
||
# 在初始化时就进行环境检查
|
||
self._check_environment()
|
||
|
||
# 检查服务变更并处理缓存
|
||
self._check_service_change()
|
||
|
||
def _check_environment(self):
|
||
"""初始化时检查环境"""
|
||
is_ci = self.is_ci_environment()
|
||
|
||
if is_ci:
|
||
ci_name = self._get_ci_name()
|
||
if self.ci_config['enabled_in_ci']:
|
||
print(f"🚀 检测到 CI 环境 ({ci_name}),AI 摘要功能已启用")
|
||
self._should_run = True
|
||
else:
|
||
print(f"🚫 检测到 CI 环境 ({ci_name}),AI 摘要功能已禁用")
|
||
self._should_run = False
|
||
else:
|
||
# 本地环境检查
|
||
if self.ci_config['enabled_in_local']:
|
||
print("💻 本地环境检测到,AI 摘要功能已启用")
|
||
self._should_run = True
|
||
else:
|
||
print("🚫 本地环境检测到,AI 摘要功能已禁用(仅在 CI 环境中启用)")
|
||
self._should_run = False
|
||
|
||
def _check_service_change(self):
|
||
"""检查AI服务是否发生变更,如有变更则自动清理缓存"""
|
||
# 如果禁用了缓存功能,跳过服务变更检查
|
||
if not self.ci_config['cache_enabled']:
|
||
return
|
||
|
||
current_config = {
|
||
'default_service': self.default_service,
|
||
'available_services': list(self.ai_services.keys()),
|
||
'summary_language': self.summary_language,
|
||
'check_time': datetime.now().isoformat()
|
||
}
|
||
|
||
if self.service_config_file.exists():
|
||
try:
|
||
with open(self.service_config_file, 'r', encoding='utf-8') as f:
|
||
previous_config = json.load(f)
|
||
|
||
# 检查默认服务或语言是否变更
|
||
if (previous_config.get('default_service') != current_config['default_service'] or
|
||
previous_config.get('summary_language') != current_config['summary_language']):
|
||
old_service = previous_config.get('default_service', 'unknown')
|
||
new_service = current_config['default_service']
|
||
old_lang = previous_config.get('summary_language', 'zh')
|
||
new_lang = current_config['summary_language']
|
||
|
||
if old_service != new_service:
|
||
print(f"🔄 检测到AI服务变更: {old_service} → {new_service}")
|
||
if old_lang != new_lang:
|
||
print(f"🌍 检测到语言变更: {old_lang} → {new_lang}")
|
||
|
||
print("🧹 自动清理AI摘要缓存...")
|
||
|
||
try:
|
||
# 删除整个缓存目录
|
||
if self.cache_dir.exists():
|
||
shutil.rmtree(self.cache_dir)
|
||
print(f"✅ 已删除缓存文件夹: {self.cache_dir}")
|
||
|
||
# 重新创建缓存目录
|
||
self.cache_dir.mkdir(exist_ok=True)
|
||
print("📁 已重新创建缓存目录")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 清理缓存失败: {e}")
|
||
# 如果删除失败,尝试清理单个文件
|
||
try:
|
||
self._clear_cache_files()
|
||
except:
|
||
print("⚠️ 缓存清理失败,新摘要可能会混用旧配置的缓存")
|
||
|
||
except Exception as e:
|
||
print(f"读取服务配置失败: {e}")
|
||
|
||
# 保存当前配置
|
||
try:
|
||
with open(self.service_config_file, 'w', encoding='utf-8') as f:
|
||
json.dump(current_config, f, ensure_ascii=False, indent=2)
|
||
except Exception as e:
|
||
print(f"保存服务配置失败: {e}")
|
||
|
||
def _clear_cache_files(self):
|
||
"""清理缓存文件(备用方法)"""
|
||
cleared_count = 0
|
||
try:
|
||
for cache_file in self.cache_dir.glob("*.json"):
|
||
if cache_file.name != "service_config.json":
|
||
cache_file.unlink()
|
||
cleared_count += 1
|
||
print(f"✅ 已清理 {cleared_count} 个缓存文件")
|
||
except Exception as e:
|
||
print(f"❌ 单文件清理失败: {e}")
|
||
|
||
def configure_ai_service(self, service_name, config=None):
|
||
"""
|
||
配置AI服务
|
||
|
||
Args:
|
||
service_name: 服务名称 ('glm', 'openai', 'azure_openai', 'claude', 'gemini')
|
||
config: 服务配置字典
|
||
"""
|
||
old_service = self.default_service
|
||
|
||
if config:
|
||
self.ai_services[service_name] = config
|
||
self.default_service = service_name
|
||
|
||
# 如果服务发生变更,自动清理缓存
|
||
if old_service != service_name:
|
||
print(f"🔄 AI服务已切换: {old_service} → {service_name}")
|
||
print("🧹 自动清理所有AI摘要缓存...")
|
||
|
||
try:
|
||
if self.cache_dir.exists():
|
||
shutil.rmtree(self.cache_dir)
|
||
print(f"✅ 已删除缓存文件夹: {self.cache_dir}")
|
||
|
||
# 重新创建缓存目录
|
||
self.cache_dir.mkdir(exist_ok=True)
|
||
print("📁 已重新创建缓存目录")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 清理缓存失败: {e}")
|
||
# 如果删除失败,尝试清理单个文件
|
||
try:
|
||
self._clear_cache_files()
|
||
except:
|
||
print("⚠️ 缓存清理失败,新摘要可能会混用旧服务的缓存")
|
||
|
||
# 更新服务配置记录
|
||
self._check_service_change()
|
||
|
||
def configure_language(self, language='zh'):
|
||
"""
|
||
配置摘要语言
|
||
|
||
Args:
|
||
language: 语言设置 ('zh': 中文, 'en': 英文, 'both': 双语)
|
||
"""
|
||
old_language = self.summary_language
|
||
self.summary_language = language
|
||
|
||
if old_language != language:
|
||
print(f"🌍 摘要语言已切换: {old_language} → {language}")
|
||
print("🧹 自动清理摘要缓存以应用新语言设置...")
|
||
|
||
try:
|
||
if self.cache_dir.exists():
|
||
shutil.rmtree(self.cache_dir)
|
||
print(f"✅ 已删除缓存文件夹: {self.cache_dir}")
|
||
|
||
# 重新创建缓存目录
|
||
self.cache_dir.mkdir(exist_ok=True)
|
||
print("📁 已重新创建缓存目录")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 清理缓存失败: {e}")
|
||
|
||
# 更新服务配置记录
|
||
self._check_service_change()
|
||
|
||
def configure_folders(self, folders=None, exclude_patterns=None, exclude_files=None):
|
||
"""配置启用AI摘要的文件夹"""
|
||
if folders is not None:
|
||
self.enabled_folders = folders
|
||
if exclude_patterns is not None:
|
||
self.exclude_patterns = exclude_patterns
|
||
if exclude_files is not None:
|
||
self.exclude_files = exclude_files
|
||
|
||
def get_content_hash(self, content):
|
||
"""生成内容hash用于缓存(包含语言设置)"""
|
||
content_with_lang = f"{content}_{self.summary_language}"
|
||
return hashlib.md5(content_with_lang.encode('utf-8')).hexdigest()
|
||
|
||
def get_cached_summary(self, content_hash):
|
||
"""获取缓存的摘要"""
|
||
# 如果禁用了缓存功能,直接返回None
|
||
if not self.ci_config['cache_enabled']:
|
||
return None
|
||
|
||
cache_file = self.cache_dir / f"{content_hash}.json"
|
||
if cache_file.exists():
|
||
try:
|
||
with open(cache_file, 'r', encoding='utf-8') as f:
|
||
cache_data = json.load(f)
|
||
# 检查缓存是否过期(7天)
|
||
cache_time = datetime.fromisoformat(cache_data.get('timestamp', '1970-01-01'))
|
||
if (datetime.now() - cache_time).days < 7:
|
||
return cache_data
|
||
except:
|
||
pass
|
||
return None
|
||
|
||
def save_summary_cache(self, content_hash, summary_data):
|
||
"""保存摘要到缓存"""
|
||
# 如果禁用了缓存功能,不保存缓存
|
||
if not self.ci_config['cache_enabled']:
|
||
return
|
||
|
||
cache_file = self.cache_dir / f"{content_hash}.json"
|
||
try:
|
||
summary_data['timestamp'] = datetime.now().isoformat()
|
||
summary_data['language'] = self.summary_language
|
||
with open(cache_file, 'w', encoding='utf-8') as f:
|
||
json.dump(summary_data, f, ensure_ascii=False, indent=2)
|
||
except Exception as e:
|
||
print(f"保存摘要缓存失败: {e}")
|
||
|
||
def clean_content_for_ai(self, markdown):
|
||
"""清理内容,提取主要文本用于AI处理"""
|
||
content = markdown
|
||
|
||
# 移除YAML front matter
|
||
content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL)
|
||
|
||
# 移除已存在的阅读信息块和AI摘要块
|
||
content = re.sub(r'!!! info "📖 阅读信息".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL)
|
||
content = re.sub(r'!!! info "🤖 AI智能摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL)
|
||
content = re.sub(r'!!! tip "📝 自动摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL)
|
||
|
||
# 移除HTML标签
|
||
content = re.sub(r'<[^>]+>', '', content)
|
||
|
||
# 移除图片,保留alt文本作为内容提示
|
||
content = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'[图片:\1]', content)
|
||
|
||
# 移除链接,保留文本
|
||
content = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', content)
|
||
|
||
# 移除代码块,但保留关键信息
|
||
content = re.sub(r'```(\w+)?\n(.*?)\n```', r'[代码示例]', content, flags=re.DOTALL)
|
||
|
||
# 移除行内代码
|
||
content = re.sub(r'`[^`]+`', '[代码]', content)
|
||
|
||
# 移除表格格式但保留内容
|
||
content = re.sub(r'\|[^\n]+\|', '', content)
|
||
content = re.sub(r'^[-|:\s]+$', '', content, flags=re.MULTILINE)
|
||
|
||
# 清理格式符号
|
||
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content) # 粗体
|
||
content = re.sub(r'\*([^*]+)\*', r'\1', content) # 斜体
|
||
content = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE) # 标题符号
|
||
|
||
# 移除多余的空行和空格
|
||
content = re.sub(r'\n\s*\n', '\n\n', content)
|
||
content = re.sub(r'^[ \t]+', '', content, flags=re.MULTILINE)
|
||
content = content.strip()
|
||
|
||
return content
|
||
|
||
def build_headers(self, service_config):
|
||
"""构建请求头"""
|
||
headers = {
|
||
'Content-Type': 'application/json'
|
||
}
|
||
|
||
# 根据服务类型添加认证头
|
||
if 'azure_openai' in service_config.get('url', ''):
|
||
headers['api-key'] = service_config['api_key']
|
||
elif 'anthropic.com' in service_config.get('url', ''):
|
||
headers['x-api-key'] = service_config['api_key']
|
||
headers['anthropic-version'] = '2023-06-01'
|
||
elif 'googleapis.com' in service_config.get('url', ''):
|
||
# Google API使用URL参数
|
||
pass
|
||
else:
|
||
# OpenAI和GLM使用Bearer token
|
||
headers['Authorization'] = f"Bearer {service_config['api_key']}"
|
||
|
||
# 添加额外的头部
|
||
if 'headers_extra' in service_config:
|
||
headers.update(service_config['headers_extra'])
|
||
|
||
return headers
|
||
|
||
def build_payload(self, service_name, service_config, content, page_title):
|
||
"""构建请求载荷"""
|
||
# 根据语言设置生成不同的prompt
|
||
if self.summary_language == 'en':
|
||
prompt = f"""Please generate a high-quality summary for the following technical article with these requirements:
|
||
|
||
1. **Length Control**: Strictly limit to 80-120 words
|
||
2. **Content Requirements**:
|
||
- Accurately summarize the core themes and key points of the article
|
||
- Highlight technical features, application scenarios, or problems solved
|
||
- Use professional but understandable language
|
||
- Avoid repeating the article title content
|
||
3. **Format Requirements**:
|
||
- Return summary content directly without any prefix or suffix
|
||
- Use concise declarative sentences
|
||
- Technical terms are appropriate
|
||
|
||
Article Title: {page_title}
|
||
|
||
Article Content:
|
||
{content[:2500]}
|
||
|
||
Please generate summary:"""
|
||
|
||
elif self.summary_language == 'both':
|
||
prompt = f"""Please generate a bilingual summary (Chinese and English) for the following technical article with these requirements:
|
||
|
||
1. **Length Control**:
|
||
- Chinese: 80-120 characters
|
||
- English: 80-120 words
|
||
2. **Content Requirements**:
|
||
- Accurately summarize the core themes and key points
|
||
- Highlight technical features, application scenarios, or problems solved
|
||
- Use professional but understandable language
|
||
- Avoid repeating the article title content
|
||
3. **Format Requirements**:
|
||
- First provide Chinese summary
|
||
- Then provide English summary
|
||
- Separate with a blank line
|
||
- No prefixes or additional formatting
|
||
|
||
Article Title: {page_title}
|
||
|
||
Article Content:
|
||
{content[:2500]}
|
||
|
||
Please generate bilingual summary:"""
|
||
|
||
else: # 默认中文
|
||
prompt = f"""请为以下技术文章生成一个高质量的摘要,要求:
|
||
|
||
1. **长度控制**:严格控制在80-120字以内
|
||
2. **内容要求**:
|
||
- 准确概括文章的核心主题和关键要点
|
||
- 突出技术特点、应用场景或解决的问题
|
||
- 使用专业但易懂的语言
|
||
- 避免重复文章标题的内容
|
||
3. **格式要求**:
|
||
- 直接返回摘要内容,无需任何前缀或后缀
|
||
- 使用简洁的陈述句
|
||
- 可以适当使用技术术语
|
||
|
||
文章标题:{page_title}
|
||
|
||
文章内容:
|
||
{content[:2500]}
|
||
|
||
请生成摘要:"""
|
||
|
||
if service_name == 'claude':
|
||
# Claude API格式
|
||
return {
|
||
"model": service_config['model'],
|
||
"max_tokens": service_config['max_tokens'],
|
||
"temperature": service_config['temperature'],
|
||
"messages": [
|
||
{
|
||
"role": "user",
|
||
"content": prompt
|
||
}
|
||
]
|
||
}
|
||
elif service_name == 'gemini':
|
||
# Gemini API格式
|
||
return {
|
||
"contents": [
|
||
{
|
||
"parts": [
|
||
{
|
||
"text": prompt
|
||
}
|
||
]
|
||
}
|
||
],
|
||
"generationConfig": {
|
||
"temperature": service_config['temperature'],
|
||
"maxOutputTokens": service_config['max_tokens']
|
||
}
|
||
}
|
||
else:
|
||
# OpenAI格式 (OpenAI, GLM, Azure OpenAI)
|
||
system_content = {
|
||
'zh': "你是一个专业的技术文档摘要专家,擅长提取文章核心要点并生成简洁准确的中文摘要。",
|
||
'en': "You are a professional technical documentation summary expert, skilled at extracting core points from articles and generating concise and accurate English summaries.",
|
||
'both': "You are a professional technical documentation summary expert, skilled at extracting core points from articles and generating concise and accurate bilingual summaries in both Chinese and English."
|
||
}
|
||
|
||
return {
|
||
"model": service_config['model'],
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": system_content.get(self.summary_language, system_content['zh'])
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": prompt
|
||
}
|
||
],
|
||
"max_tokens": service_config['max_tokens'] * (2 if self.summary_language == 'both' else 1),
|
||
"temperature": service_config['temperature'],
|
||
"top_p": 0.9
|
||
}
|
||
|
||
def extract_response_content(self, service_name, response_data):
|
||
"""从响应中提取内容"""
|
||
try:
|
||
if service_name == 'claude':
|
||
return response_data['content'][0]['text']
|
||
elif service_name == 'gemini':
|
||
return response_data['candidates'][0]['content']['parts'][0]['text']
|
||
else:
|
||
# OpenAI格式
|
||
return response_data['choices'][0]['message']['content']
|
||
except (KeyError, IndexError) as e:
|
||
print(f"解析{service_name}响应失败: {e}")
|
||
return None
|
||
|
||
def generate_ai_summary_with_service(self, content, page_title, service_name):
|
||
"""使用指定服务生成摘要"""
|
||
if service_name not in self.ai_services:
|
||
print(f"不支持的AI服务: {service_name}")
|
||
return None
|
||
|
||
service_config = self.ai_services[service_name]
|
||
|
||
# 检查API密钥
|
||
if not service_config['api_key'] or service_config['api_key'].startswith('your-'):
|
||
print(f"{service_name} API密钥未配置")
|
||
return None
|
||
|
||
try:
|
||
headers = self.build_headers(service_config)
|
||
payload = self.build_payload(service_name, service_config, content, page_title)
|
||
|
||
# 对于Google API,添加API密钥到URL
|
||
url = service_config['url']
|
||
if service_name == 'gemini':
|
||
url = f"{url}?key={service_config['api_key']}"
|
||
|
||
response = requests.post(
|
||
url,
|
||
headers=headers,
|
||
json=payload,
|
||
timeout=30
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
summary = self.extract_response_content(service_name, result)
|
||
|
||
if summary:
|
||
# 清理可能的格式问题
|
||
summary = re.sub(r'^["""''`]+|["""''`]+$', '', summary.strip())
|
||
summary = re.sub(r'^\s*摘要[::]\s*', '', summary)
|
||
summary = re.sub(r'^\s*总结[::]\s*', '', summary)
|
||
summary = re.sub(r'^\s*Summary[::]\s*', '', summary)
|
||
summary = re.sub(r'^\s*Abstract[::]\s*', '', summary)
|
||
return summary
|
||
|
||
else:
|
||
print(f"{service_name} API请求失败: {response.status_code} - {response.text}")
|
||
return None
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
print(f"{service_name} API请求异常: {e}")
|
||
return None
|
||
except Exception as e:
|
||
print(f"{service_name} 摘要生成异常: {e}")
|
||
return None
|
||
|
||
def generate_ai_summary(self, content, page_title=""):
|
||
"""生成AI摘要(支持CI环境策略)"""
|
||
is_ci = self.is_ci_environment()
|
||
|
||
# 如果在 CI 环境中且配置为只使用缓存
|
||
if is_ci and self.ci_config['ci_only_cache']:
|
||
print(f"📦 CI 环 environment仅使用缓存模式")
|
||
return None, 'ci_cache_only'
|
||
|
||
# 按优先级尝试不同服务
|
||
services_to_try = [self.default_service] + [s for s in self.service_fallback_order if s != self.default_service]
|
||
|
||
for service_name in services_to_try:
|
||
if service_name in self.ai_services:
|
||
lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'}
|
||
env_desc = '(CI)' if is_ci else '(本地)'
|
||
print(f"🔄 尝试使用 {service_name} 生成{lang_desc.get(self.summary_language, '中文')}摘要 {env_desc}...")
|
||
summary = self.generate_ai_summary_with_service(content, page_title, service_name)
|
||
if summary:
|
||
return summary, service_name
|
||
|
||
print("⚠️ 所有AI服务均不可用")
|
||
return None, None
|
||
|
||
def generate_fallback_summary(self, content, page_title=""):
|
||
"""生成备用摘要(考虑CI环境配置)"""
|
||
is_ci = self.is_ci_environment()
|
||
|
||
# 如果在 CI 环境中且禁用了备用摘要
|
||
if is_ci and not self.ci_config['ci_fallback_enabled']:
|
||
print(f"🚫 CI 环境禁用备用摘要")
|
||
return None
|
||
|
||
# 移除格式符号
|
||
clean_text = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE)
|
||
clean_text = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean_text)
|
||
clean_text = re.sub(r'\*([^*]+)\*', r'\1', clean_text)
|
||
|
||
# 分割成句子
|
||
sentences = re.split(r'[\u3002\uff01\uff1f.!?]', clean_text)
|
||
sentences = [s.strip() for s in sentences if len(s.strip()) > 15]
|
||
|
||
# 优先选择包含关键词的句子
|
||
key_indicators = [
|
||
'介绍', '讲解', '说明', '分析', '探讨', '研究', '实现', '应用',
|
||
'方法', '技术', '算法', '原理', '概念', '特点', '优势', '解决',
|
||
'教程', '指南', '配置', '安装', '部署', '开发', '设计', '构建'
|
||
]
|
||
|
||
priority_sentences = []
|
||
normal_sentences = []
|
||
|
||
for sentence in sentences[:10]: # 处理前10句
|
||
if any(keyword in sentence for keyword in key_indicators):
|
||
priority_sentences.append(sentence)
|
||
else:
|
||
normal_sentences.append(sentence)
|
||
|
||
# 组合摘要
|
||
selected_sentences = []
|
||
total_length = 0
|
||
|
||
# 优先使用关键句子
|
||
for sentence in priority_sentences:
|
||
if total_length + len(sentence) > 100:
|
||
break
|
||
selected_sentences.append(sentence)
|
||
total_length += len(sentence)
|
||
|
||
# 如果还有空间,添加普通句子
|
||
if total_length < 80:
|
||
for sentence in normal_sentences:
|
||
if total_length + len(sentence) > 100:
|
||
break
|
||
selected_sentences.append(sentence)
|
||
total_length += len(sentence)
|
||
|
||
if selected_sentences:
|
||
summary = '.'.join(selected_sentences) + '.'
|
||
# 简化冗长的摘要
|
||
if len(summary) > 120:
|
||
summary = selected_sentences[0] + '.'
|
||
|
||
# 根据语言设置生成不同的备用摘要
|
||
if self.summary_language == 'en':
|
||
return self._generate_english_fallback(page_title)
|
||
elif self.summary_language == 'both':
|
||
zh_summary = summary
|
||
en_summary = self._generate_english_fallback(page_title)
|
||
return f"{zh_summary}\n\n{en_summary}"
|
||
else:
|
||
return summary
|
||
else:
|
||
# 根据标题和语言生成通用摘要
|
||
if self.summary_language == 'en':
|
||
return self._generate_english_fallback(page_title)
|
||
elif self.summary_language == 'both':
|
||
zh_summary = self._generate_chinese_fallback(page_title)
|
||
en_summary = self._generate_english_fallback(page_title)
|
||
return f"{zh_summary}\n\n{en_summary}"
|
||
else:
|
||
return self._generate_chinese_fallback(page_title)
|
||
|
||
def _generate_chinese_fallback(self, page_title=""):
|
||
"""生成中文备用摘要"""
|
||
if page_title:
|
||
# 根据标题生成通用摘要
|
||
if any(keyword in page_title for keyword in ['教程', '指南', '配置', '安装']):
|
||
return f"本文介绍了{page_title}的相关内容,包括具体的操作步骤和注意事项,为读者提供实用的技术指导。"
|
||
elif any(keyword in page_title for keyword in ['分析', '研究', '探讨', '原理']):
|
||
return f"本文深入分析了{page_title}的核心概念和技术原理,为读者提供详细的理论解析和实践见解。"
|
||
elif any(keyword in page_title for keyword in ['开发', '构建', '实现', '设计']):
|
||
return f"本文详细讲解了{page_title}的开发过程和实现方法,分享了实际的开发经验和技术方案。"
|
||
else:
|
||
return f"本文围绕{page_title}展开讨论,介绍了相关的技术概念、应用场景和实践方法。"
|
||
else:
|
||
return "本文介绍了相关的技术概念和实践方法,为读者提供有价值的参考信息。"
|
||
|
||
def _generate_english_fallback(self, page_title=""):
|
||
"""生成英文备用摘要"""
|
||
if page_title:
|
||
# 根据标题生成通用摘要
|
||
if any(keyword in page_title.lower() for keyword in ['tutorial', 'guide', 'setup', 'install', 'config']):
|
||
return f"This article provides a comprehensive guide on {page_title}, including step-by-step instructions and important considerations for practical implementation."
|
||
elif any(keyword in page_title.lower() for keyword in ['analysis', 'research', 'study', 'principle']):
|
||
return f"This article presents an in-depth analysis of {page_title}, exploring core concepts and technical principles with detailed theoretical insights."
|
||
elif any(keyword in page_title.lower() for keyword in ['develop', 'build', 'implement', 'design']):
|
||
return f"This article explains the development process and implementation methods for {page_title}, sharing practical development experience and technical solutions."
|
||
else:
|
||
return f"This article discusses {page_title}, covering relevant technical concepts, application scenarios, and practical methods."
|
||
else:
|
||
return "This article introduces relevant technical concepts and practical methods, providing valuable reference information for readers."
|
||
|
||
def is_ci_environment(self):
|
||
"""检测是否在 CI 环境中运行"""
|
||
# 常见的 CI 环境变量
|
||
ci_indicators = [
|
||
'CI', 'CONTINUOUS_INTEGRATION', # 通用 CI 标识
|
||
'GITHUB_ACTIONS', # GitHub Actions
|
||
'GITLAB_CI', # GitLab CI
|
||
'JENKINS_URL', # Jenkins
|
||
'TRAVIS', # Travis CI
|
||
'CIRCLECI', # CircleCI
|
||
'AZURE_HTTP_USER_AGENT', # Azure DevOps
|
||
'TEAMCITY_VERSION', # TeamCity
|
||
'BUILDKITE', # Buildkite
|
||
'CODEBUILD_BUILD_ID', # AWS CodeBuild
|
||
'NETLIFY', # Netlify
|
||
'VERCEL', # Vercel
|
||
'CF_PAGES', # Cloudflare Pages
|
||
]
|
||
|
||
for indicator in ci_indicators:
|
||
if os.getenv(indicator):
|
||
return True
|
||
|
||
return False
|
||
|
||
def should_run_in_current_environment(self):
|
||
"""判断是否应该在当前环境中运行 AI 摘要"""
|
||
return self._should_run
|
||
|
||
def _get_ci_name(self):
|
||
"""获取 CI 环境名称"""
|
||
if os.getenv('GITHUB_ACTIONS'):
|
||
return 'GitHub Actions'
|
||
elif os.getenv('GITLAB_CI'):
|
||
return 'GitLab CI'
|
||
elif os.getenv('JENKINS_URL'):
|
||
return 'Jenkins'
|
||
elif os.getenv('TRAVIS'):
|
||
return 'Travis CI'
|
||
elif os.getenv('CIRCLECI'):
|
||
return 'CircleCI'
|
||
elif os.getenv('AZURE_HTTP_USER_AGENT'):
|
||
return 'Azure DevOps'
|
||
elif os.getenv('NETLIFY'):
|
||
return 'Netlify'
|
||
elif os.getenv('VERCEL'):
|
||
return 'Vercel'
|
||
elif os.getenv('CF_PAGES'):
|
||
return 'Cloudflare Pages'
|
||
elif os.getenv('CODEBUILD_BUILD_ID'):
|
||
return 'AWS CodeBuild'
|
||
else:
|
||
return 'Unknown CI'
|
||
|
||
def _auto_migrate_cache(self):
|
||
"""自动迁移缓存文件(仅在需要时执行一次)"""
|
||
# 如果禁用了缓存功能,跳过缓存迁移
|
||
if not self.ci_config.get('cache_enabled', True):
|
||
return
|
||
|
||
old_cache_dir = Path("site/.ai_cache")
|
||
new_cache_dir = Path(".ai_cache")
|
||
|
||
# 检查是否需要迁移
|
||
if old_cache_dir.exists() and not new_cache_dir.exists():
|
||
print("🔄 检测到旧缓存目录,开始自动迁移...")
|
||
|
||
try:
|
||
# 创建新目录
|
||
new_cache_dir.mkdir(exist_ok=True)
|
||
|
||
# 复制文件
|
||
cache_files = list(old_cache_dir.glob("*.json"))
|
||
copied_count = 0
|
||
|
||
for cache_file in cache_files:
|
||
target_file = new_cache_dir / cache_file.name
|
||
try:
|
||
shutil.copy2(cache_file, target_file)
|
||
copied_count += 1
|
||
except Exception as e:
|
||
print(f"⚠️ 复制缓存文件失败 {cache_file.name}: {e}")
|
||
|
||
if copied_count > 0:
|
||
print(f"✅ 自动迁移完成!共迁移 {copied_count} 个缓存文件")
|
||
print("💡 提示:请将 .ai_cache 目录提交到 Git 仓库")
|
||
else:
|
||
print("ℹ️ 没有缓存文件需要迁移")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 自动迁移失败: {e}")
|
||
|
||
elif new_cache_dir.exists():
|
||
# 新缓存目录已存在,检查是否有文件
|
||
cache_files = list(new_cache_dir.glob("*.json"))
|
||
if cache_files:
|
||
is_ci = self.is_ci_environment()
|
||
env_desc = '(CI)' if is_ci else '(本地)'
|
||
print(f"📦 发现根目录缓存 {env_desc},共 {len(cache_files)} 个缓存文件")
|
||
|
||
def process_page(self, markdown, page, config):
|
||
"""处理页面,生成AI摘要(支持CI环境检测)"""
|
||
# 检查是否应该在当前环境运行
|
||
if not self.should_run_in_current_environment():
|
||
return markdown
|
||
|
||
if not self.should_generate_summary(page, markdown):
|
||
return markdown
|
||
|
||
clean_content = self.clean_content_for_ai(markdown)
|
||
|
||
# 内容长度检查
|
||
if len(clean_content) < 100:
|
||
print(f"📄 内容太短,跳过摘要生成: {page.file.src_path}")
|
||
return markdown
|
||
|
||
content_hash = self.get_content_hash(clean_content)
|
||
page_title = getattr(page, 'title', '')
|
||
is_ci = self.is_ci_environment()
|
||
|
||
# 检查缓存
|
||
cached_summary = self.get_cached_summary(content_hash)
|
||
if cached_summary:
|
||
summary = cached_summary.get('summary', '')
|
||
ai_service = cached_summary.get('service', 'cached')
|
||
env_desc = '(CI)' if is_ci else '(本地)'
|
||
print(f"✅ 使用缓存摘要 {env_desc}: {page.file.src_path}")
|
||
else:
|
||
# 如果在 CI 环境中且配置为只使用缓存,直接跳过摘要生成
|
||
if is_ci and self.ci_config['ci_only_cache']:
|
||
print(f"📦 CI 环境仅使用缓存模式,无缓存可用,跳过摘要生成: {page.file.src_path}")
|
||
return markdown
|
||
|
||
# 生成新摘要
|
||
lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'}
|
||
env_desc = '(CI)' if is_ci else '(本地)'
|
||
print(f"🤖 正在生成{lang_desc.get(self.summary_language, '中文')}AI摘要 {env_desc}: {page.file.src_path}")
|
||
summary, ai_service = self.generate_ai_summary(clean_content, page_title)
|
||
|
||
if not summary:
|
||
# 尝试生成备用摘要
|
||
summary = self.generate_fallback_summary(clean_content, page_title)
|
||
if summary:
|
||
ai_service = 'fallback'
|
||
print(f"📝 使用备用摘要 {env_desc}: {page.file.src_path}")
|
||
else:
|
||
print(f"❌ 无法生成摘要 {env_desc}: {page.file.src_path}")
|
||
return markdown
|
||
else:
|
||
print(f"✅ AI摘要生成成功 ({ai_service}) {env_desc}: {page.file.src_path}")
|
||
|
||
# 保存到缓存
|
||
if summary:
|
||
self.save_summary_cache(content_hash, {
|
||
'summary': summary,
|
||
'service': ai_service,
|
||
'page_title': page_title
|
||
})
|
||
|
||
# 添加摘要到页面最上面
|
||
if summary:
|
||
summary_html = self.format_summary(summary, ai_service)
|
||
return summary_html + '\n\n' + markdown
|
||
else:
|
||
return markdown
|
||
|
||
def should_generate_summary(self, page, markdown):
|
||
"""判断是否应该生成摘要"""
|
||
# 检查页面元数据
|
||
if hasattr(page, 'meta'):
|
||
# 明确禁用
|
||
if page.meta.get('ai_summary') == False:
|
||
return False
|
||
|
||
# 强制启用
|
||
if page.meta.get('ai_summary') == True:
|
||
return True
|
||
|
||
# 获取文件路径
|
||
src_path = page.file.src_path.replace('\\', '/') # 统一路径分隔符
|
||
|
||
# 检查排除模式
|
||
if any(pattern in src_path for pattern in self.exclude_patterns):
|
||
return False
|
||
|
||
# 检查排除的特定文件
|
||
if src_path in self.exclude_files:
|
||
return False
|
||
|
||
# 检查是否在启用的文件夹中
|
||
for folder in self.enabled_folders:
|
||
if src_path.startswith(folder) or f'/{folder}' in src_path:
|
||
folder_name = folder.rstrip('/')
|
||
lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'}
|
||
print(f"🎯 {folder_name}文件夹文章检测到,启用{lang_desc.get(self.summary_language, '中文')}AI摘要: {src_path}")
|
||
return True
|
||
|
||
# 默认不生成摘要
|
||
return False
|
||
|
||
def format_summary(self, summary, ai_service):
|
||
"""格式化摘要显示(包含CI环境标识)"""
|
||
# 根据语言设置显示不同的标题
|
||
service_names = {
|
||
'zh': {
|
||
'glm': 'AI智能摘要 (智谱清言)',
|
||
'openai': 'AI智能摘要 (ChatGPT)',
|
||
'azure_openai': 'AI智能摘要 (Azure OpenAI)',
|
||
'claude': 'AI智能摘要 (Claude)',
|
||
'gemini': 'AI智能摘要 (Gemini)',
|
||
'fallback': '自动摘要',
|
||
'cached': 'AI智能摘要',
|
||
'ci_cache_only': 'AI智能摘要 (缓存)'
|
||
},
|
||
'en': {
|
||
'glm': 'AI Summary (GLM)',
|
||
'openai': 'AI Summary (ChatGPT)',
|
||
'azure_openai': 'AI Summary (Azure OpenAI)',
|
||
'claude': 'AI Summary (Claude)',
|
||
'gemini': 'AI Summary (Gemini)',
|
||
'fallback': 'Auto Summary',
|
||
'cached': 'AI Summary',
|
||
'ci_cache_only': 'AI Summary (Cached)'
|
||
},
|
||
'both': {
|
||
'glm': 'AI智能摘要 / AI Summary (GLM)',
|
||
'openai': 'AI智能摘要 / AI Summary (ChatGPT)',
|
||
'azure_openai': 'AI智能摘要 / AI Summary (Azure OpenAI)',
|
||
'claude': 'AI智能摘要 / AI Summary (Claude)',
|
||
'gemini': 'AI智能摘要 / AI Summary (Gemini)',
|
||
'fallback': '自动摘要 / Auto Summary',
|
||
'cached': 'AI智能摘要 / AI Summary',
|
||
'ci_cache_only': 'AI智能摘要 / AI Summary (缓存)'
|
||
}
|
||
}
|
||
|
||
name_config = service_names.get(self.summary_language, service_names['zh'])
|
||
service_name = name_config.get(ai_service, name_config['fallback'])
|
||
|
||
# 图标和颜色配置
|
||
icon = '💾' if ai_service not in ['fallback', 'ci_cache_only'] else '📝'
|
||
color = 'info' if ai_service not in ['fallback', 'ci_cache_only'] else 'tip'
|
||
|
||
return f'''!!! {color} "{icon} {service_name}"
|
||
{summary}
|
||
|
||
'''
|
||
|
||
# 创建全局实例
|
||
ai_summary_generator = AISummaryGenerator()
|
||
|
||
# 🔧 配置函数
|
||
def configure_ai_summary(enabled_folders=None, exclude_patterns=None, exclude_files=None,
|
||
ai_service=None, service_config=None, language='zh',
|
||
ci_enabled=None, local_enabled=None, ci_only_cache=None, ci_fallback=None, cache_enabled=None):
|
||
"""
|
||
配置AI摘要功能(支持CI和本地环境分别配置)
|
||
|
||
Args:
|
||
enabled_folders: 启用AI摘要的文件夹列表
|
||
exclude_patterns: 排除的模式列表
|
||
exclude_files: 排除的特定文件列表
|
||
ai_service: 使用的AI服务 ('glm', 'openai', 'claude', 'gemini')
|
||
service_config: AI服务配置
|
||
language: 摘要语言 ('zh': 中文, 'en': 英文, 'both': 双语)
|
||
ci_enabled: 是否在 CI 环境中启用
|
||
local_enabled: 是否在本地环境中启用
|
||
ci_only_cache: CI 环境是否仅使用缓存
|
||
ci_fallback: CI 环境是否启用备用摘要
|
||
cache_enabled: 是否启用缓存功能
|
||
|
||
Example:
|
||
# 本地开发时禁用缓存,总是生成新摘要
|
||
configure_ai_summary(
|
||
enabled_folders=['blog/', 'docs/'],
|
||
language='zh',
|
||
local_enabled=True,
|
||
cache_enabled=False # 禁用缓存
|
||
)
|
||
|
||
# CI中启用缓存,本地禁用缓存
|
||
configure_ai_summary(
|
||
enabled_folders=['blog/', 'docs/'],
|
||
language='zh',
|
||
ci_enabled=True,
|
||
local_enabled=True,
|
||
ci_only_cache=True, # CI仅使用缓存
|
||
cache_enabled=True # 启用缓存功能
|
||
)
|
||
"""
|
||
ai_summary_generator.configure_folders(enabled_folders, exclude_patterns, exclude_files)
|
||
ai_summary_generator.configure_language(language)
|
||
|
||
# 配置环境行为
|
||
if any(x is not None for x in [ci_enabled, local_enabled, ci_only_cache, ci_fallback, cache_enabled]):
|
||
configure_ci_behavior(ci_enabled, local_enabled, ci_only_cache, ci_fallback, cache_enabled)
|
||
|
||
if ai_service:
|
||
if service_config:
|
||
# 合并配置
|
||
current_config = ai_summary_generator.ai_services.get(ai_service, {})
|
||
current_config.update(service_config)
|
||
ai_summary_generator.configure_ai_service(ai_service, current_config)
|
||
else:
|
||
ai_summary_generator.configure_ai_service(ai_service)
|
||
|
||
# 🔧 新增 CI 配置函数
|
||
def configure_ci_behavior(enabled_in_ci=None, enabled_in_local=None, ci_only_cache=None, ci_fallback_enabled=None, cache_enabled=None):
|
||
"""
|
||
配置 CI 和本地环境行为
|
||
|
||
Args:
|
||
enabled_in_ci: 是否在 CI 环境中启用 AI 摘要
|
||
enabled_in_local: 是否在本地环境中启用 AI 摘要
|
||
ci_only_cache: CI 环境是否仅使用缓存
|
||
ci_fallback_enabled: CI 环境是否启用备用摘要
|
||
cache_enabled: 是否启用缓存功能(默认True)
|
||
|
||
Example:
|
||
# 完全禁用缓存
|
||
configure_ci_behavior(cache_enabled=False)
|
||
|
||
# 本地开发时禁用缓存,总是生成新摘要
|
||
configure_ci_behavior(enabled_in_local=True, cache_enabled=False)
|
||
|
||
# CI中使用缓存,本地禁用缓存
|
||
configure_ci_behavior(enabled_in_ci=True, enabled_in_local=True, ci_only_cache=True, cache_enabled=True)
|
||
"""
|
||
if enabled_in_ci is not None:
|
||
ai_summary_generator.ci_config['enabled_in_ci'] = enabled_in_ci
|
||
print(f"✅ CI 环境 AI 摘要: {'启用' if enabled_in_ci else '禁用'}")
|
||
|
||
if enabled_in_local is not None:
|
||
ai_summary_generator.ci_config['enabled_in_local'] = enabled_in_local
|
||
print(f"✅ 本地环境 AI 摘要: {'启用' if enabled_in_local else '禁用'}")
|
||
|
||
if ci_only_cache is not None:
|
||
ai_summary_generator.ci_config['ci_only_cache'] = ci_only_cache
|
||
print(f"✅ CI 环境仅缓存模式: {'启用' if ci_only_cache else '禁用'}")
|
||
|
||
if ci_fallback_enabled is not None:
|
||
ai_summary_generator.ci_config['ci_fallback_enabled'] = ci_fallback_enabled
|
||
print(f"✅ CI 环境备用摘要: {'启用' if ci_fallback_enabled else '禁用'}")
|
||
|
||
if cache_enabled is not None:
|
||
ai_summary_generator.ci_config['cache_enabled'] = cache_enabled
|
||
print(f"✅ 缓存功能: {'启用' if cache_enabled else '禁用'}")
|
||
|
||
def on_page_markdown(markdown, page, config, files):
|
||
"""MkDocs hook入口点"""
|
||
return ai_summary_generator.process_page(markdown, page, config) |