release: v2.0 - 完整功能管理系统
主要功能: - 完整的Flask-Admin后台管理系统 - 网站/标签/新闻管理功能 - 用户登录认证系统 - 科技感/未来风UI设计 - 标签分类系统(取代传统分类) - 详情页面展示 - 数据库迁移脚本 - 书签导入解析工具 技术栈: - Flask + SQLAlchemy - Flask-Admin管理界面 - Bootstrap 4响应式设计 - 用户认证与权限管理 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
204
utils/bookmark_parser.py
Normal file
204
utils/bookmark_parser.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""OneNav/Chrome书签HTML文件解析工具"""
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List, Dict
|
||||
import re
|
||||
|
||||
|
||||
class BookmarkParser:
|
||||
"""解析OneNav/Chrome导出的书签HTML文件"""
|
||||
|
||||
def parse_html_file(self, html_content: str, debug=False) -> Dict[str, any]:
|
||||
"""
|
||||
解析OneNav/Chrome书签HTML文件
|
||||
|
||||
Args:
|
||||
html_content: HTML文件内容
|
||||
debug: 是否打印调试信息
|
||||
|
||||
Returns:
|
||||
Dict: 包含 categories(标签列表) 和 sites(网站列表)
|
||||
"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
categories = set() # 使用set去重
|
||||
sites = []
|
||||
|
||||
# 找到第一个DL标签作为起点
|
||||
first_dl = soup.find('dl')
|
||||
if first_dl:
|
||||
# 递归解析书签,收集分类和网站
|
||||
self._parse_dl_tag(first_dl, categories, sites, current_category=None, debug=debug)
|
||||
|
||||
return {
|
||||
'categories': sorted(list(categories)), # 转为排序的列表
|
||||
'sites': sites
|
||||
}
|
||||
|
||||
def parse_html_file_legacy(self, html_content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
解析Chrome书签HTML文件(旧版格式)
|
||||
|
||||
Args:
|
||||
html_content: HTML文件内容
|
||||
|
||||
Returns:
|
||||
List[Dict]: 书签列表,每个书签包含 name, url, folder
|
||||
"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
bookmarks = []
|
||||
|
||||
# 递归解析书签
|
||||
self._parse_dl_tag_legacy(soup, bookmarks, folder_path="")
|
||||
|
||||
return bookmarks
|
||||
|
||||
def _parse_dl_tag(self, element, categories: set, sites: List[Dict], current_category: str, debug=False):
|
||||
"""递归解析DL标签(OneNav格式)"""
|
||||
# 查找所有DT标签(不限制为直接子元素,因为可能在p标签内)
|
||||
dt_list = element.find_all('dt')
|
||||
if debug and dt_list:
|
||||
print(f"Found {len(dt_list)} DT tags total")
|
||||
|
||||
for dt in dt_list:
|
||||
# 检查是否是文件夹/分类
|
||||
h3 = dt.find('h3', recursive=False)
|
||||
if h3:
|
||||
category_name = h3.get_text(strip=True)
|
||||
|
||||
# 跳过根节点和默认分类
|
||||
if category_name not in ['OneNav', 'OneNav默认分类']:
|
||||
categories.add(category_name)
|
||||
if debug:
|
||||
print(f" Category: {category_name}")
|
||||
|
||||
# 检查是否是书签链接(并且不在子分类的DL中)
|
||||
a = dt.find('a', recursive=False)
|
||||
if a and a.get('href'):
|
||||
# 找到这个DT所属的最近的H3分类
|
||||
parent_category = None
|
||||
# 向上查找同级或父级的H3
|
||||
prev = dt.find_previous('h3')
|
||||
if prev:
|
||||
parent_category = prev.get_text(strip=True)
|
||||
# 跳过根节点和默认分类
|
||||
if parent_category in ['OneNav', 'OneNav默认分类']:
|
||||
parent_category = None
|
||||
|
||||
if parent_category:
|
||||
url = a['href']
|
||||
title = a.get_text(strip=True)
|
||||
|
||||
sites.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'category': parent_category,
|
||||
'add_date': a.get('add_date', '')
|
||||
})
|
||||
if debug:
|
||||
print(f" Site: {title} -> {parent_category}")
|
||||
|
||||
def _parse_dl_tag_legacy(self, element, bookmarks: List[Dict], folder_path: str):
|
||||
"""递归解析DL标签(Chrome旧格式)"""
|
||||
# 查找所有DT标签(书签项)
|
||||
for dt in element.find_all('dt', recursive=False):
|
||||
# 检查是否是文件夹
|
||||
h3 = dt.find('h3', recursive=False)
|
||||
if h3:
|
||||
folder_name = h3.get_text(strip=True)
|
||||
new_folder_path = f"{folder_path}/{folder_name}" if folder_path else folder_name
|
||||
|
||||
# 递归解析子文件夹
|
||||
dl = dt.find('dl', recursive=False)
|
||||
if dl:
|
||||
self._parse_dl_tag_legacy(dl, bookmarks, new_folder_path)
|
||||
|
||||
# 检查是否是书签链接
|
||||
a = dt.find('a', recursive=False)
|
||||
if a and a.get('href'):
|
||||
url = a['href']
|
||||
name = a.get_text(strip=True)
|
||||
|
||||
bookmarks.append({
|
||||
'name': name,
|
||||
'url': url,
|
||||
'folder': folder_path
|
||||
})
|
||||
|
||||
def parse_url_list(self, text: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
解析纯文本URL列表
|
||||
|
||||
Args:
|
||||
text: 文本内容,每行一个URL
|
||||
|
||||
Returns:
|
||||
List[Dict]: URL列表
|
||||
"""
|
||||
urls = []
|
||||
lines = text.strip().split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
# 简单的URL验证
|
||||
if re.match(r'^https?://', line):
|
||||
urls.append({
|
||||
'url': line,
|
||||
'name': '', # 名称留空,后续自动获取
|
||||
'folder': ''
|
||||
})
|
||||
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def clean_title(title: str) -> str:
|
||||
"""清理网站标题,提取网站名称"""
|
||||
if not title:
|
||||
return ''
|
||||
|
||||
# 去除HTML实体
|
||||
title = re.sub(r'&', '&', title)
|
||||
title = re.sub(r'<', '<', title)
|
||||
title = re.sub(r'>', '>', title)
|
||||
title = re.sub(r'"', '"', title)
|
||||
|
||||
# 常见的分隔符
|
||||
separators = [' - ', ' | ', ' · ', '·', '|', ' — ']
|
||||
for sep in separators:
|
||||
if sep in title:
|
||||
parts = title.split(sep)
|
||||
# 过滤掉常见的无用部分
|
||||
filtered_parts = []
|
||||
skip_keywords = ['官网', '首页', 'official', 'home', 'page', 'website']
|
||||
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part and not any(kw in part.lower() for kw in skip_keywords):
|
||||
filtered_parts.append(part)
|
||||
|
||||
if filtered_parts:
|
||||
# 返回最短的部分(通常是网站名)
|
||||
return min(filtered_parts, key=len)
|
||||
|
||||
# 去除一些常见的后缀词
|
||||
suffixes = [
|
||||
r'\s*官网\s*$', r'\s*首页\s*$',
|
||||
r'\s*Official Site\s*$', r'\s*Home Page\s*$',
|
||||
r'\s*Homepage\s*$', r'\s*Website\s*$'
|
||||
]
|
||||
for suffix in suffixes:
|
||||
title = re.sub(suffix, '', title, flags=re.IGNORECASE)
|
||||
|
||||
return title.strip()
|
||||
|
||||
@staticmethod
|
||||
def extract_domain(url: str) -> str:
|
||||
"""从URL提取域名"""
|
||||
match = re.search(r'https?://([^/]+)', url)
|
||||
if match:
|
||||
domain = match.group(1)
|
||||
# 去除www前缀
|
||||
domain = re.sub(r'^www\.', '', domain)
|
||||
return domain
|
||||
return url
|
||||
98
utils/tag_generator.py
Normal file
98
utils/tag_generator.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""DeepSeek AI 标签生成工具"""
|
||||
import os
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
class TagGenerator:
|
||||
"""使用DeepSeek生成标签"""
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.environ.get('DEEPSEEK_API_KEY')
|
||||
self.base_url = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com')
|
||||
self.client = None
|
||||
|
||||
# 如果有API key,初始化客户端
|
||||
if self.api_key:
|
||||
self.client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url
|
||||
)
|
||||
|
||||
def generate_tags(self, name, description, existing_tags=None):
|
||||
"""
|
||||
根据产品名称和描述生成标签
|
||||
|
||||
Args:
|
||||
name: 产品名称
|
||||
description: 产品描述
|
||||
existing_tags: 现有标签列表(用于参考)
|
||||
|
||||
Returns:
|
||||
list: 生成的标签列表
|
||||
"""
|
||||
# 检查是否配置了API key
|
||||
if not self.client:
|
||||
raise ValueError("DEEPSEEK_API_KEY未配置,请在.env文件中添加")
|
||||
|
||||
try:
|
||||
# 构建提示词
|
||||
existing_tags_str = ""
|
||||
if existing_tags:
|
||||
existing_tags_str = f"\n\n系统中已有的标签参考:\n{', '.join(existing_tags)}\n尽量使用已有标签,如果合适的话。"
|
||||
|
||||
prompt = f"""你是一个AI工具导航网站的标签生成助手。根据以下产品信息,生成3-5个最合适的标签。
|
||||
|
||||
产品名称: {name}
|
||||
|
||||
产品描述: {description}
|
||||
{existing_tags_str}
|
||||
|
||||
要求:
|
||||
1. 标签应该准确描述产品的功能、类型或应用场景
|
||||
2. 每个标签2-4个汉字
|
||||
3. 标签要具体且有区分度
|
||||
4. 如果是AI工具,可以标注具体的AI类型(如"GPT"、"图像生成"等)
|
||||
5. 只返回标签,用逗号分隔,不要其他说明
|
||||
|
||||
示例输出格式:写作助手,营销,GPT,内容生成
|
||||
|
||||
请生成标签:"""
|
||||
|
||||
# 调用DeepSeek API
|
||||
response = self.client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": "你是一个专业的AI工具分类专家,擅长为各类AI产品生成准确的标签。"},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.7,
|
||||
max_tokens=100
|
||||
)
|
||||
|
||||
# 解析返回的标签
|
||||
tags_text = response.choices[0].message.content.strip()
|
||||
tags = [tag.strip() for tag in tags_text.split(',') if tag.strip()]
|
||||
|
||||
# 限制标签数量为3-5个
|
||||
if len(tags) > 5:
|
||||
tags = tags[:5]
|
||||
|
||||
return tags
|
||||
|
||||
except Exception as e:
|
||||
print(f"DeepSeek标签生成失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def generate_news_summary(self, url, content):
|
||||
"""
|
||||
生成新闻摘要(未来功能)
|
||||
|
||||
Args:
|
||||
url: 新闻链接
|
||||
content: 新闻内容
|
||||
|
||||
Returns:
|
||||
str: 新闻摘要
|
||||
"""
|
||||
# TODO: 实现新闻摘要生成
|
||||
pass
|
||||
Reference in New Issue
Block a user