From 54ac87b615cb611fac206ee7b758925e1e54e51c Mon Sep 17 00:00:00 2001 From: <> Date: Wed, 4 Jun 2025 08:00:43 +0000 Subject: [PATCH] Deployed 1082654 with MkDocs version: 1.6.1 --- .../__pycache__/ai_summary.cpython-313.pyc | Bin 45320 -> 45320 bytes .../__pycache__/socialmedia.cpython-313.pyc | Bin 1298 -> 1298 bytes overrides/hooks/test2.py | 1068 +++++++++++++++++ 3 files changed, 1068 insertions(+) create mode 100644 overrides/hooks/test2.py diff --git a/overrides/hooks/__pycache__/ai_summary.cpython-313.pyc b/overrides/hooks/__pycache__/ai_summary.cpython-313.pyc index 238079b2e35fb434a6c4552cdff19e19ddb7aeba..c111b1556ad0cfca0c69bef850793a79656be18c 100644 GIT binary patch delta 21 bcmeBp#MJSKiR&{jFBbz42>sp2^=|_JP&)^5 delta 21 bcmeBp#MJSKiR&{jFBbz4=ziJA^=|_JP_PGt diff --git a/overrides/hooks/__pycache__/socialmedia.cpython-313.pyc b/overrides/hooks/__pycache__/socialmedia.cpython-313.pyc index a6aa871cc8b3b08bd0f428f04d830db64746ccaf..66a6a735cce7c631d207b9014954980dd4508c56 100644 GIT binary patch delta 19 ZcmbQlHHnMsGcPX}0}u%P-N?nk3IHwE1Tg>r delta 19 ZcmbQlHHnMsGcPX}0}$wb*~rDg3IHxk1VaD- diff --git a/overrides/hooks/test2.py b/overrides/hooks/test2.py new file mode 100644 index 0000000..add6195 --- /dev/null +++ b/overrides/hooks/test2.py @@ -0,0 +1,1068 @@ +from dotenv import load_dotenv +load_dotenv() # 自动加载 .env 文件 + +import re +import json +import hashlib +import requests +from pathlib import Path +from datetime import datetime +import os +import shutil + +class AISummaryGenerator: + def __init__(self): + # 🗂️ 统一缓存路径策略 - 本地和CI环境都使用项目根目录 + # 这样避免了CI构建时被清理,也简化了路径管理 + self.cache_dir = Path(".ai_cache") + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # 🚀 CI 环境配置 - 默认只在 CI 环境中启用 + # AI摘要环境配置 + self.ci_config = { + # CI部署环境开关 (true=CI中启用AI摘要生成) + 'enabled_in_ci': os.getenv('AI_SUMMARY_CI_ENABLED', 'true').lower() == 'true', + + # 本地部署环境开关 (true=本地开发时启用AI摘要) + 'enabled_in_local': os.getenv('AI_SUMMARY_LOCAL_ENABLED', 'false').lower() == 'true', + + # CI部署仅缓存模式 (true=仅使用缓存不调用API, false=允许生成新摘要) + 'ci_only_cache': os.getenv('AI_SUMMARY_CI_ONLY_CACHE', 'false').lower() == 'true', + + # 本地部署缓存功能开关 (true=启用缓存避免重复生成, false=总是生成新摘要) + 'cache_enabled': os.getenv('AI_SUMMARY_CACHE_ENABLED', 'true').lower() == 'true', + + # CI部署备用摘要开关 (true=API失败时生成基础摘要, false=失败时不显示摘要) + 'ci_fallback_enabled': os.getenv('AI_SUMMARY_CI_FALLBACK', 'true').lower() == 'true', + } + + # 🔄 自动缓存迁移逻辑(一次性迁移旧缓存) - 移到ci_config初始化之后 + self._auto_migrate_cache() + + # 添加服务配置文件,用于跟踪当前使用的服务 + self.service_config_file = self.cache_dir / "service_config.json" + + # 🤖 多AI服务配置 + self.ai_services = { + 'deepseek': { + 'url': 'https://api.deepseek.com/v1/chat/completions', + 'model': 'deepseek-chat', + 'api_key': os.getenv('DEEPSEEK_API_KEY', ), + 'max_tokens': 150, + 'temperature': 0.3 + }, + 'openai': { + 'url': 'https://api.chatanywhere.tech/v1/chat/completions', + 'model': 'gpt-3.5-turbo', # 或 'gpt-4', 'gpt-4-turbo' + 'api_key': os.getenv('OPENAI_API_KEY', ), + 'max_tokens': 150, + 'temperature': 0.3 + }, + # 'claude': { + # 'url': 'https://api.anthropic.com/v1/messages', + # 'model': 'claude-3-haiku-20240307', + # 'api_key': os.getenv('ANTHROPIC_API_KEY', 'your-claude-api-key'), + # 'max_tokens': 150, + # 'temperature': 0.3 + # }, + 'gemini': { + 'url': 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent', + 'model': 'gemini-pro', + 'api_key': os.getenv('GOOGLE_API_KEY', 'AIzaSyDwWgffCCyVFZVsRasX3B3arWFaCT1PzNI'), + 'max_tokens': 150, + 'temperature': 0.3 + } + } + + # 默认使用的AI服务 + self.default_service = 'deepseek' + + # 服务优先级(按顺序尝试) + self.service_fallback_order = ['openai', 'deepseek', 'claude', 'gemini'] + + # 📂 可自定义的文件夹配置 + self.enabled_folders = [ + 'blog/', # blog文件夹 + 'develop/', # develop文件夹 + # 'posts/', # posts文件夹 + # 'trip/', # trip文件夹 + # 'about/', # about文件夹 + ] + + # 📋 排除的文件和文件夹 + self.exclude_patterns = [ + 'waline.md', 'link.md', '404.md', 'tag.md', 'tags.md', + '/about/', '/search/', '/sitemap', '/admin/', + 'index.md', # 根目录index.md + ] + + # 📋 排除的特定文件 + self.exclude_files = [ + 'blog/index.md', + 'blog/indexblog.md', + 'docs/index.md', + 'develop/index.md', + ] + + # 🌍 语言配置/Language Configuration + self.summary_language = 'zh' # 默认中文,可选 'zh'、'en'、'both' + + # 在初始化时就进行环境检查 + self._check_environment() + + # 检查服务变更并处理缓存 + self._check_service_change() + + def _check_environment(self): + """初始化时检查环境""" + is_ci = self.is_ci_environment() + + if is_ci: + ci_name = self._get_ci_name() + if self.ci_config['enabled_in_ci']: + print(f"🚀 检测到 CI 环境 ({ci_name}),AI 摘要功能已启用") + self._should_run = True + else: + print(f"🚫 检测到 CI 环境 ({ci_name}),AI 摘要功能已禁用") + self._should_run = False + else: + # 本地环境检查 + if self.ci_config['enabled_in_local']: + print("💻 本地环境检测到,AI 摘要功能已启用") + self._should_run = True + else: + print("🚫 本地环境检测到,AI 摘要功能已禁用(仅在 CI 环境中启用)") + self._should_run = False + + def _check_service_change(self): + """检查AI服务是否发生变更,如有变更则自动清理缓存""" + # 如果禁用了缓存功能,跳过服务变更检查 + if not self.ci_config['cache_enabled']: + return + + current_config = { + 'default_service': self.default_service, + 'available_services': list(self.ai_services.keys()), + 'summary_language': self.summary_language, + 'check_time': datetime.now().isoformat() + } + + if self.service_config_file.exists(): + try: + with open(self.service_config_file, 'r', encoding='utf-8') as f: + previous_config = json.load(f) + + # 检查默认服务或语言是否变更 + if (previous_config.get('default_service') != current_config['default_service'] or + previous_config.get('summary_language') != current_config['summary_language']): + old_service = previous_config.get('default_service', 'unknown') + new_service = current_config['default_service'] + old_lang = previous_config.get('summary_language', 'zh') + new_lang = current_config['summary_language'] + + if old_service != new_service: + print(f"🔄 检测到AI服务变更: {old_service} → {new_service}") + if old_lang != new_lang: + print(f"🌍 检测到语言变更: {old_lang} → {new_lang}") + + print("🧹 自动清理AI摘要缓存...") + + try: + # 删除整个缓存目录 + if self.cache_dir.exists(): + shutil.rmtree(self.cache_dir) + print(f"✅ 已删除缓存文件夹: {self.cache_dir}") + + # 重新创建缓存目录 + self.cache_dir.mkdir(exist_ok=True) + print("📁 已重新创建缓存目录") + + except Exception as e: + print(f"❌ 清理缓存失败: {e}") + # 如果删除失败,尝试清理单个文件 + try: + self._clear_cache_files() + except: + print("⚠️ 缓存清理失败,新摘要可能会混用旧配置的缓存") + + except Exception as e: + print(f"读取服务配置失败: {e}") + + # 保存当前配置 + try: + with open(self.service_config_file, 'w', encoding='utf-8') as f: + json.dump(current_config, f, ensure_ascii=False, indent=2) + except Exception as e: + print(f"保存服务配置失败: {e}") + + def _clear_cache_files(self): + """清理缓存文件(备用方法)""" + cleared_count = 0 + try: + for cache_file in self.cache_dir.glob("*.json"): + if cache_file.name != "service_config.json": + cache_file.unlink() + cleared_count += 1 + print(f"✅ 已清理 {cleared_count} 个缓存文件") + except Exception as e: + print(f"❌ 单文件清理失败: {e}") + + def configure_ai_service(self, service_name, config=None): + """ + 配置AI服务 + + Args: + service_name: 服务名称 ('deepseek', 'openai', 'azure_openai', 'claude', 'gemini') + config: 服务配置字典 + """ + old_service = self.default_service + + if config: + self.ai_services[service_name] = config + self.default_service = service_name + + # 如果服务发生变更,自动清理缓存 + if old_service != service_name: + print(f"🔄 AI服务已切换: {old_service} → {service_name}") + print("🧹 自动清理所有AI摘要缓存...") + + try: + if self.cache_dir.exists(): + shutil.rmtree(self.cache_dir) + print(f"✅ 已删除缓存文件夹: {self.cache_dir}") + + # 重新创建缓存目录 + self.cache_dir.mkdir(exist_ok=True) + print("📁 已重新创建缓存目录") + + except Exception as e: + print(f"❌ 清理缓存失败: {e}") + # 如果删除失败,尝试清理单个文件 + try: + self._clear_cache_files() + except: + print("⚠️ 缓存清理失败,新摘要可能会混用旧服务的缓存") + + # 更新服务配置记录 + self._check_service_change() + + def configure_language(self, language='zh'): + """ + 配置摘要语言 + + Args: + language: 语言设置 ('zh': 中文, 'en': 英文, 'both': 双语) + """ + old_language = self.summary_language + self.summary_language = language + + if old_language != language: + print(f"🌍 摘要语言已切换: {old_language} → {language}") + print("🧹 自动清理摘要缓存以应用新语言设置...") + + try: + if self.cache_dir.exists(): + shutil.rmtree(self.cache_dir) + print(f"✅ 已删除缓存文件夹: {self.cache_dir}") + + # 重新创建缓存目录 + self.cache_dir.mkdir(exist_ok=True) + print("📁 已重新创建缓存目录") + + except Exception as e: + print(f"❌ 清理缓存失败: {e}") + + # 更新服务配置记录 + self._check_service_change() + + def configure_folders(self, folders=None, exclude_patterns=None, exclude_files=None): + """配置启用AI摘要的文件夹""" + if folders is not None: + self.enabled_folders = folders + if exclude_patterns is not None: + self.exclude_patterns = exclude_patterns + if exclude_files is not None: + self.exclude_files = exclude_files + + def get_content_hash(self, content): + """生成内容hash用于缓存(包含语言设置)""" + content_with_lang = f"{content}_{self.summary_language}" + return hashlib.md5(content_with_lang.encode('utf-8')).hexdigest() + + def get_cached_summary(self, content_hash): + """获取缓存的摘要""" + # 如果禁用了缓存功能,直接返回None + if not self.ci_config['cache_enabled']: + return None + + cache_file = self.cache_dir / f"{content_hash}.json" + if cache_file.exists(): + try: + with open(cache_file, 'r', encoding='utf-8') as f: + cache_data = json.load(f) + # 检查缓存是否过期(7天) + cache_time = datetime.fromisoformat(cache_data.get('timestamp', '1970-01-01')) + if (datetime.now() - cache_time).days < 7: + return cache_data + except: + pass + return None + + def save_summary_cache(self, content_hash, summary_data): + """保存摘要到缓存""" + # 如果禁用了缓存功能,不保存缓存 + if not self.ci_config['cache_enabled']: + return + + cache_file = self.cache_dir / f"{content_hash}.json" + try: + summary_data['timestamp'] = datetime.now().isoformat() + summary_data['language'] = self.summary_language + with open(cache_file, 'w', encoding='utf-8') as f: + json.dump(summary_data, f, ensure_ascii=False, indent=2) + except Exception as e: + print(f"保存摘要缓存失败: {e}") + + def clean_content_for_ai(self, markdown): + """清理内容,提取主要文本用于AI处理""" + content = markdown + + # 移除YAML front matter + content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL) + + # 移除已存在的阅读信息块和AI摘要块 + content = re.sub(r'!!! info "📖 阅读信息".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + content = re.sub(r'!!! info "🤖 AI智能摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + content = re.sub(r'!!! tip "📝 自动摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + + # 移除HTML标签 + content = re.sub(r'<[^>]+>', '', content) + + # 移除图片,保留alt文本作为内容提示 + content = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'[图片:\1]', content) + + # 移除链接,保留文本 + content = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', content) + + # 移除代码块,但保留关键信息 + content = re.sub(r'```(\w+)?\n(.*?)\n```', r'[代码示例]', content, flags=re.DOTALL) + + # 移除行内代码 + content = re.sub(r'`[^`]+`', '[代码]', content) + + # 移除表格格式但保留内容 + content = re.sub(r'\|[^\n]+\|', '', content) + content = re.sub(r'^[-|:\s]+$', '', content, flags=re.MULTILINE) + + # 清理格式符号 + content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content) # 粗体 + content = re.sub(r'\*([^*]+)\*', r'\1', content) # 斜体 + content = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE) # 标题符号 + + # 移除多余的空行和空格 + content = re.sub(r'\n\s*\n', '\n\n', content) + content = re.sub(r'^[ \t]+', '', content, flags=re.MULTILINE) + content = content.strip() + + return content + + def build_headers(self, service_config): + """构建请求头""" + headers = { + 'Content-Type': 'application/json' + } + + # 根据服务类型添加认证头 + if 'azure_openai' in service_config.get('url', ''): + headers['api-key'] = service_config['api_key'] + elif 'anthropic.com' in service_config.get('url', ''): + headers['x-api-key'] = service_config['api_key'] + headers['anthropic-version'] = '2023-06-01' + elif 'googleapis.com' in service_config.get('url', ''): + # Google API使用URL参数 + pass + else: + # OpenAI和DeepSeek使用Bearer token + headers['Authorization'] = f"Bearer {service_config['api_key']}" + + # 添加额外的头部 + if 'headers_extra' in service_config: + headers.update(service_config['headers_extra']) + + return headers + + def build_payload(self, service_name, service_config, content, page_title): + """构建请求载荷""" + # 根据语言设置生成不同的prompt + if self.summary_language == 'en': + prompt = f"""Please generate a high-quality summary for the following technical article with these requirements: + +1. **Length Control**: Strictly limit to 80-120 words +2. **Content Requirements**: + - Accurately summarize the core themes and key points of the article + - Highlight technical features, application scenarios, or problems solved + - Use professional but understandable language + - Avoid repeating the article title content +3. **Format Requirements**: + - Return summary content directly without any prefix or suffix + - Use concise declarative sentences + - Technical terms are appropriate + +Article Title: {page_title} + +Article Content: +{content[:2500]} + +Please generate summary:""" + + elif self.summary_language == 'both': + prompt = f"""Please generate a bilingual summary (Chinese and English) for the following technical article with these requirements: + +1. **Length Control**: + - Chinese: 80-120 characters + - English: 80-120 words +2. **Content Requirements**: + - Accurately summarize the core themes and key points + - Highlight technical features, application scenarios, or problems solved + - Use professional but understandable language + - Avoid repeating the article title content +3. **Format Requirements**: + - First provide Chinese summary + - Then provide English summary + - Separate with a blank line + - No prefixes or additional formatting + +Article Title: {page_title} + +Article Content: +{content[:2500]} + +Please generate bilingual summary:""" + + else: # 默认中文 + prompt = f"""请为以下技术文章生成一个高质量的摘要,要求: + +1. **长度控制**:严格控制在80-120字以内 +2. **内容要求**: + - 准确概括文章的核心主题和关键要点 + - 突出技术特点、应用场景或解决的问题 + - 使用专业但易懂的语言 + - 避免重复文章标题的内容 +3. **格式要求**: + - 直接返回摘要内容,无需任何前缀或后缀 + - 使用简洁的陈述句 + - 可以适当使用技术术语 + +文章标题:{page_title} + +文章内容: +{content[:2500]} + +请生成摘要:""" + + if service_name == 'claude': + # Claude API格式 + return { + "model": service_config['model'], + "max_tokens": service_config['max_tokens'], + "temperature": service_config['temperature'], + "messages": [ + { + "role": "user", + "content": prompt + } + ] + } + elif service_name == 'gemini': + # Gemini API格式 + return { + "contents": [ + { + "parts": [ + { + "text": prompt + } + ] + } + ], + "generationConfig": { + "temperature": service_config['temperature'], + "maxOutputTokens": service_config['max_tokens'] + } + } + else: + # OpenAI格式 (OpenAI, DeepSeek, Azure OpenAI) + system_content = { + 'zh': "你是一个专业的技术文档摘要专家,擅长提取文章核心要点并生成简洁准确的中文摘要。", + 'en': "You are a professional technical documentation summary expert, skilled at extracting core points from articles and generating concise and accurate English summaries.", + 'both': "You are a professional technical documentation summary expert, skilled at extracting core points from articles and generating concise and accurate bilingual summaries in both Chinese and English." + } + + return { + "model": service_config['model'], + "messages": [ + { + "role": "system", + "content": system_content.get(self.summary_language, system_content['zh']) + }, + { + "role": "user", + "content": prompt + } + ], + "max_tokens": service_config['max_tokens'] * (2 if self.summary_language == 'both' else 1), + "temperature": service_config['temperature'], + "top_p": 0.9 + } + + def extract_response_content(self, service_name, response_data): + """从响应中提取内容""" + try: + if service_name == 'claude': + return response_data['content'][0]['text'] + elif service_name == 'gemini': + return response_data['candidates'][0]['content']['parts'][0]['text'] + else: + # OpenAI格式 + return response_data['choices'][0]['message']['content'] + except (KeyError, IndexError) as e: + print(f"解析{service_name}响应失败: {e}") + return None + + def generate_ai_summary_with_service(self, content, page_title, service_name): + """使用指定服务生成摘要""" + if service_name not in self.ai_services: + print(f"不支持的AI服务: {service_name}") + return None + + service_config = self.ai_services[service_name] + + # 检查API密钥 + if not service_config['api_key'] or service_config['api_key'].startswith('your-'): + print(f"{service_name} API密钥未配置") + return None + + try: + headers = self.build_headers(service_config) + payload = self.build_payload(service_name, service_config, content, page_title) + + # 对于Google API,添加API密钥到URL + url = service_config['url'] + if service_name == 'gemini': + url = f"{url}?key={service_config['api_key']}" + + response = requests.post( + url, + headers=headers, + json=payload, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + summary = self.extract_response_content(service_name, result) + + if summary: + # 清理可能的格式问题 + summary = re.sub(r'^["""''`]+|["""''`]+$', '', summary.strip()) + summary = re.sub(r'^\s*摘要[::]\s*', '', summary) + summary = re.sub(r'^\s*总结[::]\s*', '', summary) + summary = re.sub(r'^\s*Summary[::]\s*', '', summary) + summary = re.sub(r'^\s*Abstract[::]\s*', '', summary) + return summary + + else: + print(f"{service_name} API请求失败: {response.status_code} - {response.text}") + return None + + except requests.exceptions.RequestException as e: + print(f"{service_name} API请求异常: {e}") + return None + except Exception as e: + print(f"{service_name} 摘要生成异常: {e}") + return None + + def generate_ai_summary(self, content, page_title=""): + """生成AI摘要(支持CI环境策略)""" + is_ci = self.is_ci_environment() + + # 如果在 CI 环境中且配置为只使用缓存 + if is_ci and self.ci_config['ci_only_cache']: + print(f"📦 CI 环 environment仅使用缓存模式") + return None, 'ci_cache_only' + + # 按优先级尝试不同服务 + services_to_try = [self.default_service] + [s for s in self.service_fallback_order if s != self.default_service] + + for service_name in services_to_try: + if service_name in self.ai_services: + lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'} + env_desc = '(CI)' if is_ci else '(本地)' + print(f"🔄 尝试使用 {service_name} 生成{lang_desc.get(self.summary_language, '中文')}摘要 {env_desc}...") + summary = self.generate_ai_summary_with_service(content, page_title, service_name) + if summary: + return summary, service_name + + print("⚠️ 所有AI服务均不可用") + return None, None + + def generate_fallback_summary(self, content, page_title=""): + """生成备用摘要(考虑CI环境配置)""" + is_ci = self.is_ci_environment() + + # 如果在 CI 环境中且禁用了备用摘要 + if is_ci and not self.ci_config['ci_fallback_enabled']: + print(f"🚫 CI 环境禁用备用摘要") + return None + + # 移除格式符号 + clean_text = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE) + clean_text = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean_text) + clean_text = re.sub(r'\*([^*]+)\*', r'\1', clean_text) + + # 分割成句子 + sentences = re.split(r'[\u3002\uff01\uff1f.!?]', clean_text) + sentences = [s.strip() for s in sentences if len(s.strip()) > 15] + + # 优先选择包含关键词的句子 + key_indicators = [ + '介绍', '讲解', '说明', '分析', '探讨', '研究', '实现', '应用', + '方法', '技术', '算法', '原理', '概念', '特点', '优势', '解决', + '教程', '指南', '配置', '安装', '部署', '开发', '设计', '构建' + ] + + priority_sentences = [] + normal_sentences = [] + + for sentence in sentences[:10]: # 处理前10句 + if any(keyword in sentence for keyword in key_indicators): + priority_sentences.append(sentence) + else: + normal_sentences.append(sentence) + + # 组合摘要 + selected_sentences = [] + total_length = 0 + + # 优先使用关键句子 + for sentence in priority_sentences: + if total_length + len(sentence) > 100: + break + selected_sentences.append(sentence) + total_length += len(sentence) + + # 如果还有空间,添加普通句子 + if total_length < 80: + for sentence in normal_sentences: + if total_length + len(sentence) > 100: + break + selected_sentences.append(sentence) + total_length += len(sentence) + + if selected_sentences: + summary = '.'.join(selected_sentences) + '.' + # 简化冗长的摘要 + if len(summary) > 120: + summary = selected_sentences[0] + '.' + + # 根据语言设置生成不同的备用摘要 + if self.summary_language == 'en': + return self._generate_english_fallback(page_title) + elif self.summary_language == 'both': + zh_summary = summary + en_summary = self._generate_english_fallback(page_title) + return f"{zh_summary}\n\n{en_summary}" + else: + return summary + else: + # 根据标题和语言生成通用摘要 + if self.summary_language == 'en': + return self._generate_english_fallback(page_title) + elif self.summary_language == 'both': + zh_summary = self._generate_chinese_fallback(page_title) + en_summary = self._generate_english_fallback(page_title) + return f"{zh_summary}\n\n{en_summary}" + else: + return self._generate_chinese_fallback(page_title) + + def _generate_chinese_fallback(self, page_title=""): + """生成中文备用摘要""" + if page_title: + # 根据标题生成通用摘要 + if any(keyword in page_title for keyword in ['教程', '指南', '配置', '安装']): + return f"本文介绍了{page_title}的相关内容,包括具体的操作步骤和注意事项,为读者提供实用的技术指导。" + elif any(keyword in page_title for keyword in ['分析', '研究', '探讨', '原理']): + return f"本文深入分析了{page_title}的核心概念和技术原理,为读者提供详细的理论解析和实践见解。" + elif any(keyword in page_title for keyword in ['开发', '构建', '实现', '设计']): + return f"本文详细讲解了{page_title}的开发过程和实现方法,分享了实际的开发经验和技术方案。" + else: + return f"本文围绕{page_title}展开讨论,介绍了相关的技术概念、应用场景和实践方法。" + else: + return "本文介绍了相关的技术概念和实践方法,为读者提供有价值的参考信息。" + + def _generate_english_fallback(self, page_title=""): + """生成英文备用摘要""" + if page_title: + # 根据标题生成通用摘要 + if any(keyword in page_title.lower() for keyword in ['tutorial', 'guide', 'setup', 'install', 'config']): + return f"This article provides a comprehensive guide on {page_title}, including step-by-step instructions and important considerations for practical implementation." + elif any(keyword in page_title.lower() for keyword in ['analysis', 'research', 'study', 'principle']): + return f"This article presents an in-depth analysis of {page_title}, exploring core concepts and technical principles with detailed theoretical insights." + elif any(keyword in page_title.lower() for keyword in ['develop', 'build', 'implement', 'design']): + return f"This article explains the development process and implementation methods for {page_title}, sharing practical development experience and technical solutions." + else: + return f"This article discusses {page_title}, covering relevant technical concepts, application scenarios, and practical methods." + else: + return "This article introduces relevant technical concepts and practical methods, providing valuable reference information for readers." + + def is_ci_environment(self): + """检测是否在 CI 环境中运行""" + # 常见的 CI 环境变量 + ci_indicators = [ + 'CI', 'CONTINUOUS_INTEGRATION', # 通用 CI 标识 + 'GITHUB_ACTIONS', # GitHub Actions + 'GITLAB_CI', # GitLab CI + 'JENKINS_URL', # Jenkins + 'TRAVIS', # Travis CI + 'CIRCLECI', # CircleCI + 'AZURE_HTTP_USER_AGENT', # Azure DevOps + 'TEAMCITY_VERSION', # TeamCity + 'BUILDKITE', # Buildkite + 'CODEBUILD_BUILD_ID', # AWS CodeBuild + 'NETLIFY', # Netlify + 'VERCEL', # Vercel + 'CF_PAGES', # Cloudflare Pages + ] + + for indicator in ci_indicators: + if os.getenv(indicator): + return True + + return False + + def should_run_in_current_environment(self): + """判断是否应该在当前环境中运行 AI 摘要""" + return self._should_run + + def _get_ci_name(self): + """获取 CI 环境名称""" + if os.getenv('GITHUB_ACTIONS'): + return 'GitHub Actions' + elif os.getenv('GITLAB_CI'): + return 'GitLab CI' + elif os.getenv('JENKINS_URL'): + return 'Jenkins' + elif os.getenv('TRAVIS'): + return 'Travis CI' + elif os.getenv('CIRCLECI'): + return 'CircleCI' + elif os.getenv('AZURE_HTTP_USER_AGENT'): + return 'Azure DevOps' + elif os.getenv('NETLIFY'): + return 'Netlify' + elif os.getenv('VERCEL'): + return 'Vercel' + elif os.getenv('CF_PAGES'): + return 'Cloudflare Pages' + elif os.getenv('CODEBUILD_BUILD_ID'): + return 'AWS CodeBuild' + else: + return 'Unknown CI' + + def _auto_migrate_cache(self): + """自动迁移缓存文件(仅在需要时执行一次)""" + # 如果禁用了缓存功能,跳过缓存迁移 + if not self.ci_config.get('cache_enabled', True): + return + + old_cache_dir = Path("site/.ai_cache") + new_cache_dir = Path(".ai_cache") + + # 检查是否需要迁移 + if old_cache_dir.exists() and not new_cache_dir.exists(): + print("🔄 检测到旧缓存目录,开始自动迁移...") + + try: + # 创建新目录 + new_cache_dir.mkdir(exist_ok=True) + + # 复制文件 + cache_files = list(old_cache_dir.glob("*.json")) + copied_count = 0 + + for cache_file in cache_files: + target_file = new_cache_dir / cache_file.name + try: + shutil.copy2(cache_file, target_file) + copied_count += 1 + except Exception as e: + print(f"⚠️ 复制缓存文件失败 {cache_file.name}: {e}") + + if copied_count > 0: + print(f"✅ 自动迁移完成!共迁移 {copied_count} 个缓存文件") + print("💡 提示:请将 .ai_cache 目录提交到 Git 仓库") + else: + print("ℹ️ 没有缓存文件需要迁移") + + except Exception as e: + print(f"❌ 自动迁移失败: {e}") + + elif new_cache_dir.exists(): + # 新缓存目录已存在,检查是否有文件 + cache_files = list(new_cache_dir.glob("*.json")) + if cache_files: + is_ci = self.is_ci_environment() + env_desc = '(CI)' if is_ci else '(本地)' + print(f"📦 发现根目录缓存 {env_desc},共 {len(cache_files)} 个缓存文件") + + def process_page(self, markdown, page, config): + """处理页面,生成AI摘要(支持CI环境检测)""" + # 检查是否应该在当前环境运行 + if not self.should_run_in_current_environment(): + return markdown + + if not self.should_generate_summary(page, markdown): + return markdown + + clean_content = self.clean_content_for_ai(markdown) + + # 内容长度检查 + if len(clean_content) < 100: + print(f"📄 内容太短,跳过摘要生成: {page.file.src_path}") + return markdown + + content_hash = self.get_content_hash(clean_content) + page_title = getattr(page, 'title', '') + is_ci = self.is_ci_environment() + + # 检查缓存 + cached_summary = self.get_cached_summary(content_hash) + if cached_summary: + summary = cached_summary.get('summary', '') + ai_service = cached_summary.get('service', 'cached') + env_desc = '(CI)' if is_ci else '(本地)' + print(f"✅ 使用缓存摘要 {env_desc}: {page.file.src_path}") + else: + # 如果在 CI 环境中且配置为只使用缓存,直接跳过摘要生成 + if is_ci and self.ci_config['ci_only_cache']: + print(f"📦 CI 环境仅使用缓存模式,无缓存可用,跳过摘要生成: {page.file.src_path}") + return markdown + + # 生成新摘要 + lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'} + env_desc = '(CI)' if is_ci else '(本地)' + print(f"🤖 正在生成{lang_desc.get(self.summary_language, '中文')}AI摘要 {env_desc}: {page.file.src_path}") + summary, ai_service = self.generate_ai_summary(clean_content, page_title) + + if not summary: + # 尝试生成备用摘要 + summary = self.generate_fallback_summary(clean_content, page_title) + if summary: + ai_service = 'fallback' + print(f"📝 使用备用摘要 {env_desc}: {page.file.src_path}") + else: + print(f"❌ 无法生成摘要 {env_desc}: {page.file.src_path}") + return markdown + else: + print(f"✅ AI摘要生成成功 ({ai_service}) {env_desc}: {page.file.src_path}") + + # 保存到缓存 + if summary: + self.save_summary_cache(content_hash, { + 'summary': summary, + 'service': ai_service, + 'page_title': page_title + }) + + # 添加摘要到页面最上面 + if summary: + summary_html = self.format_summary(summary, ai_service) + return summary_html + '\n\n' + markdown + else: + return markdown + + def should_generate_summary(self, page, markdown): + """判断是否应该生成摘要""" + # 检查页面元数据 + if hasattr(page, 'meta'): + # 明确禁用 + if page.meta.get('ai_summary') == False: + return False + + # 强制启用 + if page.meta.get('ai_summary') == True: + return True + + # 获取文件路径 + src_path = page.file.src_path.replace('\\', '/') # 统一路径分隔符 + + # 检查排除模式 + if any(pattern in src_path for pattern in self.exclude_patterns): + return False + + # 检查排除的特定文件 + if src_path in self.exclude_files: + return False + + # 检查是否在启用的文件夹中 + for folder in self.enabled_folders: + if src_path.startswith(folder) or f'/{folder}' in src_path: + folder_name = folder.rstrip('/') + lang_desc = {'zh': '中文', 'en': '英文', 'both': '双语'} + print(f"🎯 {folder_name}文件夹文章检测到,启用{lang_desc.get(self.summary_language, '中文')}AI摘要: {src_path}") + return True + + # 默认不生成摘要 + return False + + def format_summary(self, summary, ai_service): + """格式化摘要显示(包含CI环境标识)""" + # 根据语言设置显示不同的标题 + service_names = { + 'zh': { + 'deepseek': 'AI智能摘要 (DeepSeek)', + 'openai': 'AI智能摘要 (ChatGPT)', + 'azure_openai': 'AI智能摘要 (Azure OpenAI)', + 'claude': 'AI智能摘要 (Claude)', + 'gemini': 'AI智能摘要 (Gemini)', + 'fallback': '自动摘要', + 'cached': 'AI智能摘要', + 'ci_cache_only': 'AI智能摘要 (缓存)' + }, + 'en': { + 'deepseek': 'AI Summary (DeepSeek)', + 'openai': 'AI Summary (ChatGPT)', + 'azure_openai': 'AI Summary (Azure OpenAI)', + 'claude': 'AI Summary (Claude)', + 'gemini': 'AI Summary (Gemini)', + 'fallback': 'Auto Summary', + 'cached': 'AI Summary', + 'ci_cache_only': 'AI Summary (Cached)' + }, + 'both': { + 'deepseek': 'AI智能摘要 / AI Summary (DeepSeek)', + 'openai': 'AI智能摘要 / AI Summary (ChatGPT)', + 'azure_openai': 'AI智能摘要 / AI Summary (Azure OpenAI)', + 'claude': 'AI智能摘要 / AI Summary (Claude)', + 'gemini': 'AI智能摘要 / AI Summary (Gemini)', + 'fallback': '自动摘要 / Auto Summary', + 'cached': 'AI智能摘要 / AI Summary', + 'ci_cache_only': 'AI智能摘要 / AI Summary (缓存)' + } + } + + name_config = service_names.get(self.summary_language, service_names['zh']) + service_name = name_config.get(ai_service, name_config['fallback']) + + # 图标和颜色配置 + icon = '💾' if ai_service not in ['fallback', 'ci_cache_only'] else '📝' + color = 'info' if ai_service not in ['fallback', 'ci_cache_only'] else 'tip' + + return f'''!!! {color} "{icon} {service_name}" + {summary} + +''' + +# 创建全局实例 +ai_summary_generator = AISummaryGenerator() + +# 🔧 配置函数 +def configure_ai_summary(enabled_folders=None, exclude_patterns=None, exclude_files=None, + ai_service=None, service_config=None, language='zh', + ci_enabled=None, local_enabled=None, ci_only_cache=None, ci_fallback=None, cache_enabled=None): + """ + 配置AI摘要功能(支持CI和本地环境分别配置) + + Args: + enabled_folders: 启用AI摘要的文件夹列表 + exclude_patterns: 排除的模式列表 + exclude_files: 排除的特定文件列表 + ai_service: 使用的AI服务 ('deepseek', 'openai', 'claude', 'gemini') + service_config: AI服务配置 + language: 摘要语言 ('zh': 中文, 'en': 英文, 'both': 双语) + ci_enabled: 是否在 CI 环境中启用 + local_enabled: 是否在本地环境中启用 + ci_only_cache: CI 环境是否仅使用缓存 + ci_fallback: CI 环境是否启用备用摘要 + cache_enabled: 是否启用缓存功能 + + Example: + # 本地开发时禁用缓存,总是生成新摘要 + configure_ai_summary( + enabled_folders=['blog/', 'docs/'], + language='zh', + local_enabled=True, + cache_enabled=False # 禁用缓存 + ) + + # CI中启用缓存,本地禁用缓存 + configure_ai_summary( + enabled_folders=['blog/', 'docs/'], + language='zh', + ci_enabled=True, + local_enabled=True, + ci_only_cache=True, # CI仅使用缓存 + cache_enabled=True # 启用缓存功能 + ) + """ + ai_summary_generator.configure_folders(enabled_folders, exclude_patterns, exclude_files) + ai_summary_generator.configure_language(language) + + # 配置环境行为 + if any(x is not None for x in [ci_enabled, local_enabled, ci_only_cache, ci_fallback, cache_enabled]): + configure_ci_behavior(ci_enabled, local_enabled, ci_only_cache, ci_fallback, cache_enabled) + + if ai_service: + if service_config: + # 合并配置 + current_config = ai_summary_generator.ai_services.get(ai_service, {}) + current_config.update(service_config) + ai_summary_generator.configure_ai_service(ai_service, current_config) + else: + ai_summary_generator.configure_ai_service(ai_service) + +# 🔧 新增 CI 配置函数 +def configure_ci_behavior(enabled_in_ci=None, enabled_in_local=None, ci_only_cache=None, ci_fallback_enabled=None, cache_enabled=None): + """ + 配置 CI 和本地环境行为 + + Args: + enabled_in_ci: 是否在 CI 环境中启用 AI 摘要 + enabled_in_local: 是否在本地环境中启用 AI 摘要 + ci_only_cache: CI 环境是否仅使用缓存 + ci_fallback_enabled: CI 环境是否启用备用摘要 + cache_enabled: 是否启用缓存功能(默认True) + + Example: + # 完全禁用缓存 + configure_ci_behavior(cache_enabled=False) + + # 本地开发时禁用缓存,总是生成新摘要 + configure_ci_behavior(enabled_in_local=True, cache_enabled=False) + + # CI中使用缓存,本地禁用缓存 + configure_ci_behavior(enabled_in_ci=True, enabled_in_local=True, ci_only_cache=True, cache_enabled=True) + """ + if enabled_in_ci is not None: + ai_summary_generator.ci_config['enabled_in_ci'] = enabled_in_ci + print(f"✅ CI 环境 AI 摘要: {'启用' if enabled_in_ci else '禁用'}") + + if enabled_in_local is not None: + ai_summary_generator.ci_config['enabled_in_local'] = enabled_in_local + print(f"✅ 本地环境 AI 摘要: {'启用' if enabled_in_local else '禁用'}") + + if ci_only_cache is not None: + ai_summary_generator.ci_config['ci_only_cache'] = ci_only_cache + print(f"✅ CI 环境仅缓存模式: {'启用' if ci_only_cache else '禁用'}") + + if ci_fallback_enabled is not None: + ai_summary_generator.ci_config['ci_fallback_enabled'] = ci_fallback_enabled + print(f"✅ CI 环境备用摘要: {'启用' if ci_fallback_enabled else '禁用'}") + + if cache_enabled is not None: + ai_summary_generator.ci_config['cache_enabled'] = cache_enabled + print(f"✅ 缓存功能: {'启用' if cache_enabled else '禁用'}") + +def on_page_markdown(markdown, page, config, files): + """MkDocs hook入口点""" + return ai_summary_generator.process_page(markdown, page, config) \ No newline at end of file