diff --git a/.cache/plugin/git-committers/page-authors.json b/.cache/plugin/git-committers/page-authors.json index 99cb682..554c7a3 100644 --- a/.cache/plugin/git-committers/page-authors.json +++ b/.cache/plugin/git-committers/page-authors.json @@ -1 +1 @@ -{"cache_date": "2024-07-22", "page_authors": {}} \ No newline at end of file +{"cache_date": "2025-06-03", "page_authors": {}} \ No newline at end of file diff --git a/docs/blog/index.md b/docs/blog/index.md index 8b7dacf..3675754 100644 --- a/docs/blog/index.md +++ b/docs/blog/index.md @@ -6,6 +6,7 @@ hide: # - feedback # comments: false # icon: octicons/home-fill-24 +hide_reading_time: true --- # MyBlog diff --git a/docs/blog/posts/update2025.md b/docs/blog/posts/update2025.md index 1571498..e8fd6e5 100644 --- a/docs/blog/posts/update2025.md +++ b/docs/blog/posts/update2025.md @@ -20,7 +20,7 @@ readtime: 2 ##
* 优化网站流畅度(玄学) -* 优化[友链](../../about/link.md)统计方式,更加准确计数 +* 优化[友链](../../link.md)统计方式,更加准确计数 * 修复评论区重定位的bug,防止他人网站测试评论区导致评论区覆盖到我的网站 ##
diff --git a/docs/overrides/hooks/__pycache__/ai_summary.cpython-311.pyc b/docs/overrides/hooks/__pycache__/ai_summary.cpython-311.pyc new file mode 100644 index 0000000..2c1f60a Binary files /dev/null and b/docs/overrides/hooks/__pycache__/ai_summary.cpython-311.pyc differ diff --git a/docs/overrides/hooks/__pycache__/reading_time.cpython-311.pyc b/docs/overrides/hooks/__pycache__/reading_time.cpython-311.pyc new file mode 100644 index 0000000..1a2e144 Binary files /dev/null and b/docs/overrides/hooks/__pycache__/reading_time.cpython-311.pyc differ diff --git a/docs/overrides/hooks/__pycache__/socialmedia.cpython-311.pyc b/docs/overrides/hooks/__pycache__/socialmedia.cpython-311.pyc new file mode 100644 index 0000000..7b51888 Binary files /dev/null and b/docs/overrides/hooks/__pycache__/socialmedia.cpython-311.pyc differ diff --git a/docs/overrides/hooks/ai_summary.py b/docs/overrides/hooks/ai_summary.py new file mode 100644 index 0000000..d8616fb --- /dev/null +++ b/docs/overrides/hooks/ai_summary.py @@ -0,0 +1,393 @@ +import re +import json +import hashlib +import requests +from pathlib import Path +from datetime import datetime + +class AISummaryGenerator: + def __init__(self): + self.cache_dir = Path("site/.ai_cache") + self.cache_dir.mkdir(exist_ok=True) + + # DeepSeek API配置 + self.api_config = { + 'url': 'https://api.deepseek.com/v1/chat/completions', + 'model': 'deepseek-chat', + 'headers': { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer sk-7dbcd6e21fb3417299b50aecff76c7bf' + } + } + + # 📂 可自定义的文件夹配置 + self.enabled_folders = [ + 'blog/', # blog文件夹 + 'develop/', # develop文件夹 + # 在这里添加您想要启用AI摘要的文件夹 + ] + + # 📋 排除的文件和文件夹 + self.exclude_patterns = [ + 'liuyanban.md', 'link.md', '404.md', 'tag.md', 'tags.md', + '/about/', '/search/', '/sitemap', 'index.md', # 根目录index.md + ] + + # 📋 排除的特定文件 + self.exclude_files = [ + 'blog/index.md', + 'blog/indexblog.md', + 'docs/index.md', + 'develop/index.md', + ] + + def configure_folders(self, folders=None, exclude_patterns=None, exclude_files=None): + """ + 配置启用AI摘要的文件夹 + + Args: + folders: 启用AI摘要的文件夹列表 + exclude_patterns: 排除的模式列表 + exclude_files: 排除的特定文件列表 + """ + if folders is not None: + self.enabled_folders = folders + if exclude_patterns is not None: + self.exclude_patterns = exclude_patterns + if exclude_files is not None: + self.exclude_files = exclude_files + + def get_content_hash(self, content): + """生成内容hash用于缓存""" + return hashlib.md5(content.encode('utf-8')).hexdigest() + + def get_cached_summary(self, content_hash): + """获取缓存的摘要""" + cache_file = self.cache_dir / f"{content_hash}.json" + if cache_file.exists(): + try: + with open(cache_file, 'r', encoding='utf-8') as f: + cache_data = json.load(f) + # 检查缓存是否过期(7天) + cache_time = datetime.fromisoformat(cache_data.get('timestamp', '1970-01-01')) + if (datetime.now() - cache_time).days < 7: + return cache_data + except: + pass + return None + + def save_summary_cache(self, content_hash, summary_data): + """保存摘要到缓存""" + cache_file = self.cache_dir / f"{content_hash}.json" + try: + summary_data['timestamp'] = datetime.now().isoformat() + with open(cache_file, 'w', encoding='utf-8') as f: + json.dump(summary_data, f, ensure_ascii=False, indent=2) + except Exception as e: + print(f"保存摘要缓存失败: {e}") + + def clean_content_for_ai(self, markdown): + """清理内容,提取主要文本用于AI处理""" + content = markdown + + # 移除YAML front matter + content = re.sub(r'^---.*?---\s*', '', content, flags=re.DOTALL) + + # 移除已存在的阅读信息块和AI摘要块 + content = re.sub(r'!!! info "📖 阅读信息".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + content = re.sub(r'!!! info "🤖 AI智能摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + content = re.sub(r'!!! tip "📝 自动摘要".*?(?=\n\n|\n#|\Z)', '', content, flags=re.DOTALL) + + # 移除HTML标签 + content = re.sub(r'<[^>]+>', '', content) + + # 移除图片,保留alt文本作为内容提示 + content = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'[图片:\1]', content) + + # 移除链接,保留文本 + content = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', content) + + # 移除代码块,但保留关键信息 + content = re.sub(r'```(\w+)?\n(.*?)\n```', r'[代码示例]', content, flags=re.DOTALL) + + # 移除行内代码 + content = re.sub(r'`[^`]+`', '[代码]', content) + + # 移除表格格式但保留内容 + content = re.sub(r'\|[^\n]+\|', '', content) + content = re.sub(r'^[-|:\s]+$', '', content, flags=re.MULTILINE) + + # 清理格式符号 + content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content) # 粗体 + content = re.sub(r'\*([^*]+)\*', r'\1', content) # 斜体 + content = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE) # 标题符号 + + # 移除多余的空行和空格 + content = re.sub(r'\n\s*\n', '\n\n', content) + content = re.sub(r'^[ \t]+', '', content, flags=re.MULTILINE) + content = content.strip() + + return content + + def generate_ai_summary(self, content, page_title=""): + """使用DeepSeek生成摘要""" + # 优化的提示词 + prompt = f"""请为以下技术文章生成一个高质量的摘要,要求: + +1. **长度控制**:严格控制在80-120字以内 +2. **内容要求**: + - 准确概括文章的核心主题和关键要点 + - 突出技术特点、应用场景或解决的问题 + - 使用专业但易懂的语言 + - 避免重复文章标题的内容 +3. **格式要求**: + - 直接返回摘要内容,无需任何前缀或后缀 + - 使用简洁的陈述句 + - 可以适当使用技术术语 + +文章标题:{page_title} + +文章内容: +{content[:2500]} + +请生成摘要:""" + + try: + payload = { + "model": self.api_config['model'], + "messages": [ + { + "role": "system", + "content": "你是一个专业的技术文档摘要专家,擅长提取文章核心要点并生成简洁准确的摘要。" + }, + { + "role": "user", + "content": prompt + } + ], + "max_tokens": 150, + "temperature": 0.3, # 降低随机性,提高准确性 + "top_p": 0.9 + } + + response = requests.post( + self.api_config['url'], + headers=self.api_config['headers'], + json=payload, + timeout=30 + ) + + if response.status_code == 200: + result = response.json() + summary = result['choices'][0]['message']['content'].strip() + + # 清理可能的格式问题 + summary = re.sub(r'^["""''`]+|["""''`]+$', '', summary) + summary = re.sub(r'^\s*摘要[::]\s*', '', summary) + summary = re.sub(r'^\s*总结[::]\s*', '', summary) + + return summary + else: + print(f"DeepSeek API请求失败: {response.status_code} - {response.text}") + return None + + except requests.exceptions.RequestException as e: + print(f"DeepSeek API请求异常: {e}") + return None + except Exception as e: + print(f"AI摘要生成异常: {e}") + return None + + def generate_fallback_summary(self, content, page_title=""): + """生成备用摘要(基于规则的智能摘要)""" + # 移除格式符号 + clean_text = re.sub(r'^#+\s*', '', content, flags=re.MULTILINE) + clean_text = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean_text) + clean_text = re.sub(r'\*([^*]+)\*', r'\1', clean_text) + + # 分割成句子 + sentences = re.split(r'[。!?.!?]', clean_text) + sentences = [s.strip() for s in sentences if len(s.strip()) > 15] + + # 优先选择包含关键词的句子 + key_indicators = [ + '介绍', '讲解', '说明', '分析', '探讨', '研究', '实现', '应用', + '方法', '技术', '算法', '原理', '概念', '特点', '优势', '解决', + '教程', '指南', '配置', '安装', '部署', '开发', '设计', '构建' + ] + + priority_sentences = [] + normal_sentences = [] + + for sentence in sentences[:10]: # 处理前10句 + if any(keyword in sentence for keyword in key_indicators): + priority_sentences.append(sentence) + else: + normal_sentences.append(sentence) + + # 组合摘要 + selected_sentences = [] + total_length = 0 + + # 优先使用关键句子 + for sentence in priority_sentences: + if total_length + len(sentence) > 100: + break + selected_sentences.append(sentence) + total_length += len(sentence) + + # 如果还有空间,添加普通句子 + if total_length < 80: + for sentence in normal_sentences: + if total_length + len(sentence) > 100: + break + selected_sentences.append(sentence) + total_length += len(sentence) + + if selected_sentences: + summary = '。'.join(selected_sentences) + '。' + # 简化冗长的摘要 + if len(summary) > 120: + summary = selected_sentences[0] + '。' + return summary + else: + # 根据标题生成通用摘要 + if any(keyword in page_title for keyword in ['教程', '指南', 'Tutorial']): + return '本文提供了详细的教程指南,通过实例演示帮助读者掌握相关技术要点。' + elif any(keyword in page_title for keyword in ['配置', '设置', '安装', 'Config']): + return '本文介绍了系统配置的方法和步骤,提供实用的设置建议和最佳实践。' + elif any(keyword in page_title for keyword in ['开发', '编程', 'Development']): + return '本文分享了开发经验和技术实践,提供了实用的代码示例和解决方案。' + else: + return '本文深入探讨了相关技术内容,提供了实用的方法和解决方案。' + + def process_page(self, markdown, page, config): + """处理页面,生成AI摘要""" + if not self.should_generate_summary(page, markdown): + return markdown + + clean_content = self.clean_content_for_ai(markdown) + + # 内容长度检查 + if len(clean_content) < 200: + print(f"📄 内容太短,跳过摘要生成: {page.file.src_path}") + return markdown + + content_hash = self.get_content_hash(clean_content) + page_title = getattr(page, 'title', '') + + # 检查缓存 + cached_summary = self.get_cached_summary(content_hash) + if cached_summary: + summary = cached_summary.get('summary', '') + ai_service = 'cached' + print(f"✅ 使用缓存摘要: {page.file.src_path}") + else: + # 生成新摘要 + print(f"🤖 正在生成AI摘要: {page.file.src_path}") + summary = self.generate_ai_summary(clean_content, page_title) + + if not summary: + summary = self.generate_fallback_summary(clean_content, page_title) + ai_service = 'fallback' + print(f"📝 使用备用摘要: {page.file.src_path}") + else: + ai_service = 'deepseek' + print(f"✅ AI摘要生成成功: {page.file.src_path}") + + # 保存到缓存 + self.save_summary_cache(content_hash, { + 'summary': summary, + 'service': ai_service, + 'page_title': page_title + }) + + # 添加摘要到页面最上面 + summary_html = self.format_summary(summary, ai_service) + return summary_html + '\n\n' + markdown + + def should_generate_summary(self, page, markdown): + """判断是否应该生成摘要 - 可自定义文件夹""" + # 检查页面元数据 + if hasattr(page, 'meta'): + # 明确禁用 + if page.meta.get('ai_summary') == False: + return False + + # 强制启用 + if page.meta.get('ai_summary') == True: + return True + + # 获取文件路径 + src_path = page.file.src_path.replace('\\', '/') # 统一路径分隔符 + + # 检查排除模式 + if any(pattern in src_path for pattern in self.exclude_patterns): + return False + + # 检查排除的特定文件 + if src_path in self.exclude_files: + return False + + # 检查是否在启用的文件夹中 + for folder in self.enabled_folders: + if src_path.startswith(folder) or f'/{folder}' in src_path: + folder_name = folder.rstrip('/') + print(f"🎯 {folder_name}文件夹文章检测到,启用AI摘要: {src_path}") + return True + + # 默认不生成摘要 + return False + + def format_summary(self, summary, ai_service): + """格式化摘要显示""" + service_config = { + 'deepseek': { + 'icon': '🤖', + 'name': 'AI智能摘要', + 'color': 'info' + }, + 'fallback': { + 'icon': '📝', + 'name': '自动摘要', + 'color': 'tip' + }, + 'cached': { + 'icon': '💾', + 'name': 'AI智能摘要', + 'color': 'info' + } + } + + config = service_config.get(ai_service, service_config['deepseek']) + + return f'''!!! {config['color']} "{config['icon']} {config['name']}" + {summary} + +''' + +# 创建全局实例 +ai_summary_generator = AISummaryGenerator() + +# 🔧 自定义配置函数 +def configure_ai_summary(enabled_folders=None, exclude_patterns=None, exclude_files=None): + """ + 配置AI摘要功能 + + Args: + enabled_folders: 启用AI摘要的文件夹列表,例如 ['blog/', 'docs/', 'posts/'] + exclude_patterns: 排除的模式列表,例如 ['404.md', '/admin/'] + exclude_files: 排除的特定文件列表,例如 ['blog/index.md'] + + Example: + # 只在blog和docs文件夹启用 + configure_ai_summary(['blog/', 'docs/']) + + # 在所有文件夹启用,但排除特定文件 + configure_ai_summary([''], exclude_files=['index.md', 'about.md']) + """ + ai_summary_generator.configure_folders(enabled_folders, exclude_patterns, exclude_files) + +def on_page_markdown(markdown, page, config, files): + """MkDocs hook入口点""" + return ai_summary_generator.process_page(markdown, page, config) \ No newline at end of file diff --git a/docs/overrides/hooks/reading_time.py b/docs/overrides/hooks/reading_time.py new file mode 100644 index 0000000..fbc3731 --- /dev/null +++ b/docs/overrides/hooks/reading_time.py @@ -0,0 +1,255 @@ +import re +from functools import lru_cache + +# 预编译正则表达式(保持原有格式) +EXCLUDE_PATTERNS = [ + re.compile(r'^index\.md$'), + re.compile(r'^about/'), + re.compile(r'^trip/index\.md$'), + re.compile(r'^relax/index\.md$'), + re.compile(r'^blog/indexblog\.md$'), + re.compile(r'^blog/posts\.md$'), + re.compile(r'^develop/index\.md$'), + re.compile(r'waline\.md$'), + re.compile(r'link\.md$'), + re.compile(r'404\.md$'), +] + +# 优化的字符统计正则表达式 +CHINESE_CHARS_PATTERN = re.compile(r'[\u4e00-\u9fff\u3400-\u4dbf]') +CODE_BLOCK_PATTERN = re.compile(r'```.*?```', re.DOTALL) +INLINE_CODE_PATTERN = re.compile(r'`[^`]+`') +YAML_FRONT_PATTERN = re.compile(r'^---.*?---\s*', re.DOTALL) +HTML_TAG_PATTERN = re.compile(r'<[^>]+>') +IMAGE_PATTERN = re.compile(r'!\[.*?\]\([^)]+\)') +LINK_PATTERN = re.compile(r'\[([^\]]+)\]\([^)]+\)') + +# 预定义排除类型 +EXCLUDE_TYPES = frozenset({'landing', 'special', 'widget'}) + +# 扩展非编程行内代码词汇(更全面的过滤) +NON_CODE_WORDS = frozenset({ + 'markdown', 'target', 'blank', 'lg', 'middle', 'small', 'large', + 'left', 'right', 'center', 'top', 'bottom', 'primary', 'secondary', + 'success', 'warning', 'danger', 'info', 'light', 'dark', 'grid', + 'cards', 'octicons', 'bookmark', 'div', 'class', 'img', 'src', + 'alt', 'width', 'height', 'style', 'id', 'data', 'href', 'title' +}) + +# 支持的编程和标记语言(扩展版本) +PROGRAMMING_LANGUAGES = frozenset({ + # 编程语言 + 'python', 'py', 'javascript', 'js', 'typescript', 'ts', 'java', 'cpp', 'c', + 'go', 'rust', 'php', 'ruby', 'swift', 'kotlin', 'csharp', 'cs', + # 脚本语言 + 'bash', 'sh', 'powershell', 'ps1', 'zsh', 'fish', 'bat', 'cmd', + # 标记和配置语言 + 'html', 'css', 'scss', 'sass', 'less', 'yaml', 'yml', 'json', 'xml', + 'toml', 'ini', 'conf', 'dockerfile', 'makefile', + # 数据库和查询 + 'sql', 'mysql', 'postgresql', 'sqlite', 'mongodb', + # 其他 + 'r', 'matlab', 'scala', 'perl', 'lua', 'dart', 'tex', 'latex', + # 数据格式 + 'csv', 'properties', + # 无标识符(空字符串也算作有效语言) + '' +}) + +@lru_cache(maxsize=256) +def clean_markdown_content_for_chinese(content_hash, markdown): + """清理Markdown内容,只保留中文文本用于统计(添加缓存)""" + content = markdown + + # 使用预编译的正则表达式 + content = YAML_FRONT_PATTERN.sub('', content) + content = HTML_TAG_PATTERN.sub('', content) + content = IMAGE_PATTERN.sub('', content) + content = LINK_PATTERN.sub(r'\1', content) + content = CODE_BLOCK_PATTERN.sub('', content) + content = INLINE_CODE_PATTERN.sub('', content) + + return content + +def count_code_lines(markdown): + """统计代码行数(修复版本 - 正确处理所有代码行)""" + code_blocks = CODE_BLOCK_PATTERN.findall(markdown) + total_code_lines = 0 + + for i, block in enumerate(code_blocks): + # 提取语言标识 + lang_match = re.match(r'^```(\w*)', block) + language = lang_match.group(1).lower() if lang_match else '' + + # 移除开头的语言标识和结尾的``` + code_content = re.sub(r'^```\w*\n?', '', block) + code_content = re.sub(r'\n?```$', '', code_content) + + # 过滤空代码块 + if not code_content.strip(): + continue + + # 计算有效行数(包含所有非空行,包括注释行) + lines = [line for line in code_content.split('\n') if line.strip()] + line_count = len(lines) + + # 如果有明确的编程语言标识,直接统计 + if language and language in PROGRAMMING_LANGUAGES: + total_code_lines += line_count + continue + + # 增强的检测策略 - 更宽松的判断 + is_code = False + + # 1. 命令行检测 + command_indicators = [ + 'sudo ', 'npm ', 'pip ', 'git ', 'cd ', 'ls ', 'mkdir ', 'rm ', 'cp ', 'mv ', + 'chmod ', 'chown ', 'grep ', 'find ', 'ps ', 'kill ', 'top ', 'cat ', 'echo ', + 'wget ', 'curl ', 'tar ', 'zip ', 'unzip ', 'ssh ', 'scp ', 'rsync ', + 'xattr ', 'codesign ', 'xcode-select ', 'spctl ', 'launchctl ', + 'brew ', 'defaults ', 'ditto ', 'hdiutil ', 'diskutil ', + 'dir ', 'copy ', 'xcopy ', 'del ', 'rd ', 'md ', 'type ', 'attrib ', + '$ ', '# ', '% ', '> ', 'C:\\>', 'PS>', + '--', '-r', '-d', '-f', '-v', '-h', '--help', '--version', + '--force', '--deep', '--sign', '--master-disable', + '/Applications/', '/usr/', '/etc/', '/var/', '/home/', '~/', + 'C:\\', 'D:\\', '.app', '.exe', '.pkg', '.dmg', '.zip', '.tar', + '#!/', + ] + + if any(indicator in code_content for indicator in command_indicators): + is_code = True + + # 2. 编程语法检测(增强版) + if not is_code: + programming_indicators = [ + # Python语法特征 + 'def ', 'class ', 'import ', 'from ', 'return ', 'yield ', 'lambda ', + 'with ', 'as ', 'try:', 'except:', 'finally:', 'elif ', 'if __name__', + 'print(', '.append(', '.extend(', '.remove(', '.sort(', '.reverse(', + 'range(', 'len(', 'str(', 'int(', 'float(', 'list(', 'dict(', + # JavaScript/TypeScript语法 + 'function', 'var ', 'let ', 'const ', 'async ', 'await ', '=>', + 'console.log', 'document.', 'window.', 'require(', + # 通用编程语法 + 'public ', 'private ', 'protected ', 'static ', 'void ', 'int ', + 'string ', 'boolean ', 'float ', 'double ', 'char ', + # 操作符和结构 + '==', '!=', '<=', '>=', '&&', '||', '++', '--', '+=', '-=', '**', + # 特殊结构 + 'while ', 'for ', 'if ', 'else:', 'switch ', 'case ', + # HTML/XML语法 + '', + ' + +
+