feat: 完成全站UI优化 - 科技感/未来风设计
- 前台页面全面升级为Tailwind CSS框架 - 引入Google Fonts (Space Grotesk, Noto Sans) - 主色调更新为#25c0f4 (cyan blue) - 实现玻璃态效果和渐变背景 - 优化首页网格卡片布局和悬停动画 - 优化详情页双栏布局和渐变Logo光晕 - 优化管理员登录页,添加科技网格背景 - Flask-Admin后台完整深色主题 - 统一Material Symbols图标系统 - 网站自动抓取功能界面优化 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
1
utils/__init__.py
Normal file
1
utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Utils package
|
||||
168
utils/website_fetcher.py
Normal file
168
utils/website_fetcher.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
网站信息抓取工具
|
||||
"""
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import os
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
class WebsiteFetcher:
|
||||
"""网站信息抓取器"""
|
||||
|
||||
def __init__(self, timeout=10):
|
||||
self.timeout = timeout
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
}
|
||||
|
||||
def fetch_website_info(self, url):
|
||||
"""
|
||||
抓取网站信息
|
||||
|
||||
Args:
|
||||
url: 网站URL
|
||||
|
||||
Returns:
|
||||
dict: 包含name, description, logo_url的字典,失败返回None
|
||||
"""
|
||||
try:
|
||||
# 确保URL包含协议
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = 'https://' + url
|
||||
|
||||
# 请求网页
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, allow_redirects=True)
|
||||
response.raise_for_status()
|
||||
response.encoding = response.apparent_encoding # 自动检测编码
|
||||
|
||||
# 解析HTML
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# 提取信息
|
||||
info = {
|
||||
'name': self._extract_title(soup),
|
||||
'description': self._extract_description(soup),
|
||||
'logo_url': self._extract_logo(soup, url)
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
except Exception as e:
|
||||
print(f"抓取网站信息失败: {str(e)}")
|
||||
return None
|
||||
|
||||
def _extract_title(self, soup):
|
||||
"""提取网站标题"""
|
||||
# 优先使用 og:title
|
||||
og_title = soup.find('meta', property='og:title')
|
||||
if og_title and og_title.get('content'):
|
||||
return og_title['content'].strip()
|
||||
|
||||
# 使用 title 标签
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
return title_tag.get_text().strip()
|
||||
|
||||
return ''
|
||||
|
||||
def _extract_description(self, soup):
|
||||
"""提取网站描述"""
|
||||
# 优先使用 og:description
|
||||
og_desc = soup.find('meta', property='og:description')
|
||||
if og_desc and og_desc.get('content'):
|
||||
return og_desc['content'].strip()
|
||||
|
||||
# 使用 meta description
|
||||
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
||||
if meta_desc and meta_desc.get('content'):
|
||||
return meta_desc['content'].strip()
|
||||
|
||||
# 使用 meta keywords 作为fallback
|
||||
meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
|
||||
if meta_keywords and meta_keywords.get('content'):
|
||||
return meta_keywords['content'].strip()
|
||||
|
||||
return ''
|
||||
|
||||
def _extract_logo(self, soup, base_url):
|
||||
"""提取网站Logo"""
|
||||
logo_url = None
|
||||
|
||||
# 1. 尝试 og:image
|
||||
og_image = soup.find('meta', property='og:image')
|
||||
if og_image and og_image.get('content'):
|
||||
logo_url = og_image['content']
|
||||
|
||||
# 2. 尝试 link rel="icon" 或 "shortcut icon"
|
||||
if not logo_url:
|
||||
icon_link = soup.find('link', rel=lambda x: x and ('icon' in x.lower() if isinstance(x, str) else 'icon' in ' '.join(x).lower()))
|
||||
if icon_link and icon_link.get('href'):
|
||||
logo_url = icon_link['href']
|
||||
|
||||
# 3. 尝试 apple-touch-icon
|
||||
if not logo_url:
|
||||
apple_icon = soup.find('link', rel='apple-touch-icon')
|
||||
if apple_icon and apple_icon.get('href'):
|
||||
logo_url = apple_icon['href']
|
||||
|
||||
# 4. 默认使用 /favicon.ico
|
||||
if not logo_url:
|
||||
logo_url = '/favicon.ico'
|
||||
|
||||
# 转换为绝对URL
|
||||
if logo_url:
|
||||
logo_url = urljoin(base_url, logo_url)
|
||||
|
||||
return logo_url
|
||||
|
||||
def download_logo(self, logo_url, save_dir='static/uploads'):
|
||||
"""
|
||||
下载并保存Logo
|
||||
|
||||
Args:
|
||||
logo_url: Logo的URL
|
||||
save_dir: 保存目录
|
||||
|
||||
Returns:
|
||||
str: 保存后的相对路径,失败返回None
|
||||
"""
|
||||
if not logo_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
# 创建保存目录
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# 下载图片
|
||||
response = requests.get(logo_url, headers=self.headers, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
# 检查是否是图片
|
||||
content_type = response.headers.get('content-type', '')
|
||||
if not content_type.startswith('image/'):
|
||||
return None
|
||||
|
||||
# 生成文件名
|
||||
parsed_url = urlparse(logo_url)
|
||||
ext = os.path.splitext(parsed_url.path)[1]
|
||||
if not ext or len(ext) > 5:
|
||||
ext = '.png' # 默认扩展名
|
||||
|
||||
# 使用域名作为文件名
|
||||
domain = parsed_url.netloc.replace(':', '_').replace('.', '_')
|
||||
filename = f"logo_{domain}{ext}"
|
||||
filepath = os.path.join(save_dir, filename)
|
||||
|
||||
# 保存图片
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
# 返回相对路径(用于数据库存储)
|
||||
return f'/{filepath.replace(os.sep, "/")}'
|
||||
|
||||
except Exception as e:
|
||||
print(f"下载Logo失败: {str(e)}")
|
||||
return None
|
||||
Reference in New Issue
Block a user