From b00e52e1e0b41ea7959c8bb7e3b63e1522159f0a Mon Sep 17 00:00:00 2001 From: ZJPB Admin Date: Tue, 30 Dec 2025 23:44:27 +0800 Subject: [PATCH] =?UTF-8?q?release:=20v2.2.0=20-=20=E5=8D=9A=E6=9F=A5?= =?UTF-8?q?=E6=96=B0=E9=97=BB=E6=90=9C=E7=B4=A2=E5=8A=9F=E8=83=BD=20(?= =?UTF-8?q?=E7=94=9F=E4=BA=A7=E7=8E=AF=E5=A2=83=E9=83=A8=E7=BD=B2=E7=89=88?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 核心功能: - 集成博查Web Search API自动获取网站相关新闻 - 智能新闻更新机制(每日首次访问触发) - 精确新闻搜索(使用引号强制匹配网站名称) - News模型扩展(source_name, source_icon字段) - 网站详情页新闻展示模块 - 新闻来源网站信息展示 - 自动去重防止重复新闻 技术实现: - NewsSearcher工具类封装博查API - 数据库迁移脚本migrate_news_fields.py - 测试脚本test_news_feature.py - 定期任务脚本fetch_news_cron.py - API路由:/api/fetch-site-news, /api/fetch-all-news 配置优化: - 修复manage.sh路径和启动命令 - 博查API配置(BOCHA_API_KEY, BOCHA_BASE_URL) - 新闻搜索参数配置 界面优化: - 详情页新闻模块(左侧主栏) - 相似推荐模块(右侧边栏) - 首页标签图标修复 - 后台添加修改密码功能 - 登录页面优化 部署信息: - 部署日期: 2025-12-30 - 部署方式: 手动上传文件 - 数据库: 已迁移(添加source_name和source_icon字段) --- app.py | 278 +++++++++++++++++++++++++++++++++++- config.py | 14 ++ fetch_news_cron.py | 167 ++++++++++++++++++++++ manage.sh | 4 +- migrate_news_fields.py | 99 +++++++++++++ models.py | 2 + templates/admin/master.html | 6 + templates/admin_login.html | 142 ++++++++++++------ templates/detail_new.html | 83 +++++++---- templates/index_new.html | 154 ++++++++++++++++---- test_news_feature.py | 142 ++++++++++++++++++ utils/news_searcher.py | 271 +++++++++++++++++++++++++++++++++++ 12 files changed, 1255 insertions(+), 107 deletions(-) create mode 100644 fetch_news_cron.py create mode 100644 migrate_news_fields.py create mode 100644 test_news_feature.py create mode 100644 utils/news_searcher.py diff --git a/app.py b/app.py index 7a2e392..2160534 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,7 @@ from config import config from models import db, Site, Tag, Admin as AdminModel, News, site_tags, PromptTemplate from utils.website_fetcher import WebsiteFetcher from utils.tag_generator import TagGenerator +from utils.news_searcher import NewsSearcher def create_app(config_name='default'): """应用工厂函数""" @@ -115,6 +116,70 @@ def create_app(config_name='default'): site.view_count += 1 db.session.commit() + # 智能新闻更新:检查今天是否已更新过新闻 + from datetime import date + today = date.today() + + # 检查该网站最新一条新闻的创建时间 + latest_news = News.query.filter_by( + site_id=site.id + ).order_by(News.created_at.desc()).first() + + # 判断是否需要更新新闻 + need_update = False + if not latest_news: + # 没有任何新闻,需要获取 + need_update = True + elif latest_news.created_at.date() < today: + # 最新新闻不是今天创建的,需要更新 + need_update = True + + # 如果需要更新,自动获取最新新闻 + if need_update: + api_key = app.config.get('BOCHA_API_KEY') + if api_key: + try: + # 创建新闻搜索器 + searcher = NewsSearcher(api_key) + + # 获取新闻(限制3条,一周内的) + news_items = searcher.search_site_news( + site_name=site.name, + site_url=site.url, + count=3, + freshness='oneWeek' + ) + + # 保存新闻到数据库 + if news_items: + for item in news_items: + # 检查是否已存在(根据URL去重) + existing = News.query.filter_by( + site_id=site.id, + url=item['url'] + ).first() + + if not existing: + news = News( + site_id=site.id, + title=item['title'], + content=item.get('summary') or item.get('snippet', ''), + url=item['url'], + source_name=item.get('site_name', ''), + source_icon=item.get('site_icon', ''), + published_at=item.get('published_at'), + news_type='Search Result', + is_active=True + ) + db.session.add(news) + + db.session.commit() + + except Exception as e: + # 获取新闻失败,不影响页面显示 + print(f"自动获取新闻失败:{str(e)}") + db.session.rollback() + # 获取该网站的相关新闻(最多显示5条) news_list = News.query.filter_by( site_id=site.id, @@ -442,6 +507,205 @@ def create_app(config_name='default'): 'message': f'生成失败: {str(e)}' }), 500 + # ========== 新闻获取路由 ========== + @app.route('/api/fetch-site-news', methods=['POST']) + @login_required + def fetch_site_news(): + """为指定网站获取最新新闻""" + try: + data = request.get_json() + site_id = data.get('site_id') + count = data.get('count', app.config.get('NEWS_SEARCH_COUNT', 10)) + freshness = data.get('freshness', app.config.get('NEWS_SEARCH_FRESHNESS', 'oneMonth')) + + if not site_id: + return jsonify({ + 'success': False, + 'message': '请提供网站ID' + }), 400 + + # 获取网站信息 + site = Site.query.get(site_id) + if not site: + return jsonify({ + 'success': False, + 'message': '网站不存在' + }), 404 + + # 检查博查API配置 + api_key = app.config.get('BOCHA_API_KEY') + if not api_key: + return jsonify({ + 'success': False, + 'message': '博查API未配置,请在.env文件中设置BOCHA_API_KEY' + }), 500 + + # 创建新闻搜索器 + searcher = NewsSearcher(api_key) + + # 搜索新闻 + news_items = searcher.search_site_news( + site_name=site.name, + site_url=site.url, + count=count, + freshness=freshness + ) + + if not news_items: + return jsonify({ + 'success': False, + 'message': '未找到相关新闻' + }), 404 + + # 保存新闻到数据库 + saved_count = 0 + for item in news_items: + # 检查新闻是否已存在(根据URL判断) + existing_news = News.query.filter_by( + site_id=site_id, + url=item['url'] + ).first() + + if not existing_news: + # 创建新闻记录 + news = News( + site_id=site_id, + title=item['title'], + content=item.get('summary') or item.get('snippet', ''), + url=item['url'], + source_name=item.get('site_name', ''), + source_icon=item.get('site_icon', ''), + published_at=item.get('published_at'), + news_type='Search Result', + is_active=True + ) + db.session.add(news) + saved_count += 1 + + # 提交事务 + db.session.commit() + + return jsonify({ + 'success': True, + 'message': f'成功获取并保存 {saved_count} 条新闻', + 'total_found': len(news_items), + 'saved': saved_count, + 'news_items': searcher.format_news_for_display(news_items) + }) + + except Exception as e: + db.session.rollback() + return jsonify({ + 'success': False, + 'message': f'获取失败: {str(e)}' + }), 500 + + @app.route('/api/fetch-all-news', methods=['POST']) + @login_required + def fetch_all_news(): + """批量为所有网站获取新闻""" + try: + data = request.get_json() + count_per_site = data.get('count', 5) # 每个网站获取的新闻数量 + freshness = data.get('freshness', app.config.get('NEWS_SEARCH_FRESHNESS', 'oneMonth')) + limit = data.get('limit', 10) # 限制处理的网站数量 + + # 检查博查API配置 + api_key = app.config.get('BOCHA_API_KEY') + if not api_key: + return jsonify({ + 'success': False, + 'message': '博查API未配置,请在.env文件中设置BOCHA_API_KEY' + }), 500 + + # 获取启用的网站(按更新时间排序,优先处理旧的) + sites = Site.query.filter_by(is_active=True).order_by(Site.updated_at).limit(limit).all() + + if not sites: + return jsonify({ + 'success': False, + 'message': '没有可用的网站' + }), 404 + + # 创建新闻搜索器 + searcher = NewsSearcher(api_key) + + # 统计信息 + total_saved = 0 + total_found = 0 + processed_sites = [] + + # 为每个网站获取新闻 + for site in sites: + try: + # 搜索新闻 + news_items = searcher.search_site_news( + site_name=site.name, + site_url=site.url, + count=count_per_site, + freshness=freshness + ) + + site_saved = 0 + for item in news_items: + # 检查是否已存在 + existing_news = News.query.filter_by( + site_id=site.id, + url=item['url'] + ).first() + + if not existing_news: + news = News( + site_id=site.id, + title=item['title'], + content=item.get('summary') or item.get('snippet', ''), + url=item['url'], + source_name=item.get('site_name', ''), + source_icon=item.get('site_icon', ''), + published_at=item.get('published_at'), + news_type='Search Result', + is_active=True + ) + db.session.add(news) + site_saved += 1 + + total_found += len(news_items) + total_saved += site_saved + + processed_sites.append({ + 'id': site.id, + 'name': site.name, + 'found': len(news_items), + 'saved': site_saved + }) + + except Exception as e: + # 单个网站失败不影响其他网站 + processed_sites.append({ + 'id': site.id, + 'name': site.name, + 'error': str(e) + }) + continue + + # 提交事务 + db.session.commit() + + return jsonify({ + 'success': True, + 'message': f'批量获取完成,共处理 {len(processed_sites)} 个网站', + 'total_found': total_found, + 'total_saved': total_saved, + 'processed_sites': processed_sites + }) + + except Exception as e: + db.session.rollback() + return jsonify({ + 'success': False, + 'message': f'批量获取失败: {str(e)}' + }), 500 + # ========== 批量导入路由 ========== @app.route('/admin/batch-import', methods=['GET', 'POST']) @login_required @@ -908,9 +1172,9 @@ def create_app(config_name='default'): # 显示操作列 column_display_actions = True - column_list = ['id', 'site', 'title', 'news_type', 'published_at', 'is_active'] - column_searchable_list = ['title', 'content'] - column_filters = ['site', 'news_type', 'is_active', 'published_at'] + column_list = ['id', 'site', 'title', 'source_name', 'news_type', 'published_at', 'is_active'] + column_searchable_list = ['title', 'content', 'source_name'] + column_filters = ['site', 'news_type', 'source_name', 'is_active', 'published_at'] column_labels = { 'id': 'ID', 'site': '关联网站', @@ -918,16 +1182,19 @@ def create_app(config_name='default'): 'content': '新闻内容', 'news_type': '新闻类型', 'url': '新闻链接', + 'source_name': '来源网站', + 'source_icon': '来源图标', 'published_at': '发布时间', 'is_active': '是否启用', 'created_at': '创建时间', 'updated_at': '更新时间' } - form_columns = ['site', 'title', 'content', 'news_type', 'url', 'published_at', 'is_active'] + form_columns = ['site', 'title', 'content', 'news_type', 'url', 'source_name', 'source_icon', 'published_at', 'is_active'] # 可选的新闻类型 form_choices = { 'news_type': [ + ('Search Result', 'Search Result'), ('Product Update', 'Product Update'), ('Industry News', 'Industry News'), ('Company News', 'Company News'), @@ -935,6 +1202,9 @@ def create_app(config_name='default'): ] } + # 默认排序 + column_default_sort = ('published_at', True) # 按发布时间倒序排列 + # Prompt模板管理视图 class PromptAdmin(SecureModelView): can_edit = True diff --git a/config.py b/config.py index a604c05..d50f3f8 100644 --- a/config.py +++ b/config.py @@ -41,6 +41,20 @@ class Config: UPLOAD_FOLDER = 'static/uploads' MAX_CONTENT_LENGTH = 5 * 1024 * 1024 # 5MB ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'webp'} + + # DeepSeek API配置 + DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY') + DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL') or 'https://api.deepseek.com' + + # 博查 Web Search API配置 + BOCHA_API_KEY = os.environ.get('BOCHA_API_KEY') + BOCHA_BASE_URL = os.environ.get('BOCHA_BASE_URL') or 'https://api.bocha.cn' + BOCHA_SEARCH_ENDPOINT = '/v1/web-search' + + # 新闻搜索配置 + NEWS_SEARCH_COUNT = 10 # 每次搜索返回的新闻数量 + NEWS_SEARCH_FRESHNESS = 'oneMonth' # 默认搜索一个月内的新闻 + NEWS_SEARCH_SUMMARY = True # 是否显示摘要 class DevelopmentConfig(Config): """开发环境配置""" diff --git a/fetch_news_cron.py b/fetch_news_cron.py new file mode 100644 index 0000000..9e6b092 --- /dev/null +++ b/fetch_news_cron.py @@ -0,0 +1,167 @@ +""" +定期新闻获取任务脚本 +用途:定期为网站批量获取最新新闻 +使用:python fetch_news_cron.py [options] + +可以通过crontab定时执行: +# 每天早上8点执行,获取10个网站的新闻 +0 8 * * * cd /path/to/zjpb && /path/to/venv/bin/python fetch_news_cron.py --limit 10 >> logs/news_fetch.log 2>&1 +""" +import os +import sys +import argparse +from datetime import datetime +from dotenv import load_dotenv + +# 加载环境变量 +load_dotenv() + +# 添加项目根目录到Python路径 +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from app import create_app +from models import db, Site, News +from utils.news_searcher import NewsSearcher + + +def fetch_news_for_sites(limit=10, count_per_site=5, freshness='oneMonth'): + """ + 批量为网站获取新闻 + + Args: + limit: 处理的网站数量限制 + count_per_site: 每个网站获取的新闻数量 + freshness: 新闻时间范围 + """ + # 创建Flask应用上下文 + app = create_app(os.getenv('FLASK_ENV', 'production')) + + with app.app_context(): + # 检查博查API配置 + api_key = app.config.get('BOCHA_API_KEY') + if not api_key: + print(f"[{datetime.now()}] 错误:未配置BOCHA_API_KEY") + return False + + # 获取启用的网站(按更新时间排序,优先处理旧的) + sites = Site.query.filter_by(is_active=True).order_by(Site.updated_at).limit(limit).all() + + if not sites: + print(f"[{datetime.now()}] 没有可处理的网站") + return False + + print(f"[{datetime.now()}] 开始批量获取新闻,共 {len(sites)} 个网站") + print(f"配置:每个网站 {count_per_site} 条新闻,时间范围:{freshness}") + print("-" * 60) + + # 创建新闻搜索器 + searcher = NewsSearcher(api_key) + + # 统计信息 + total_saved = 0 + total_found = 0 + success_count = 0 + error_count = 0 + + # 为每个网站获取新闻 + for i, site in enumerate(sites, 1): + print(f"[{i}/{len(sites)}] 处理网站: {site.name}") + + try: + # 搜索新闻 + news_items = searcher.search_site_news( + site_name=site.name, + site_url=site.url, + count=count_per_site, + freshness=freshness + ) + + if not news_items: + print(f" └─ 未找到新闻") + continue + + site_saved = 0 + for item in news_items: + # 检查是否已存在 + existing_news = News.query.filter_by( + site_id=site.id, + url=item['url'] + ).first() + + if not existing_news: + news = News( + site_id=site.id, + title=item['title'], + content=item.get('summary') or item.get('snippet', ''), + url=item['url'], + source_name=item.get('site_name', ''), + source_icon=item.get('site_icon', ''), + published_at=item.get('published_at'), + news_type='Search Result', + is_active=True + ) + db.session.add(news) + site_saved += 1 + + # 提交该网站的新闻 + db.session.commit() + + total_found += len(news_items) + total_saved += site_saved + success_count += 1 + + print(f" └─ 找到 {len(news_items)} 条,保存 {site_saved} 条新闻") + + except Exception as e: + error_count += 1 + print(f" └─ 错误: {str(e)}") + db.session.rollback() + continue + + print("-" * 60) + print(f"[{datetime.now()}] 批量获取完成") + print(f"成功: {success_count} 个网站, 失败: {error_count} 个网站") + print(f"共找到 {total_found} 条新闻,保存 {total_saved} 条新新闻") + print("=" * 60) + + return True + + +def main(): + """主函数""" + parser = argparse.ArgumentParser(description='定期新闻获取任务') + parser.add_argument('--limit', type=int, default=10, help='处理的网站数量限制(默认:10)') + parser.add_argument('--count', type=int, default=5, help='每个网站获取的新闻数量(默认:5)') + parser.add_argument('--freshness', type=str, default='oneMonth', + choices=['noLimit', 'oneDay', 'oneWeek', 'oneMonth', 'oneYear'], + help='新闻时间范围(默认:oneMonth)') + + args = parser.parse_args() + + print("=" * 60) + print(f"定期新闻获取任务 - 开始时间: {datetime.now()}") + print("=" * 60) + + try: + success = fetch_news_for_sites( + limit=args.limit, + count_per_site=args.count, + freshness=args.freshness + ) + + if success: + print(f"\n任务执行成功!") + sys.exit(0) + else: + print(f"\n任务执行失败!") + sys.exit(1) + + except Exception as e: + print(f"\n[{datetime.now()}] 严重错误: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/manage.sh b/manage.sh index 51cf1e9..d541b14 100644 --- a/manage.sh +++ b/manage.sh @@ -4,7 +4,7 @@ # 用法: ./manage.sh [start|stop|restart|status|logs] APP_NAME="zjpb" -APP_DIR="/www/wwwroot/zjpb" +APP_DIR="/opt/1panel/apps/zjpb" VENV_DIR="$APP_DIR/venv" PID_FILE="$APP_DIR/logs/gunicorn.pid" @@ -14,7 +14,7 @@ case "$1" in start) echo "启动 $APP_NAME..." source $VENV_DIR/bin/activate - gunicorn -c gunicorn_config.py app:app + gunicorn -c gunicorn_config.py wsgi:app echo "$APP_NAME 已启动" ;; diff --git a/migrate_news_fields.py b/migrate_news_fields.py new file mode 100644 index 0000000..5020516 --- /dev/null +++ b/migrate_news_fields.py @@ -0,0 +1,99 @@ +""" +数据库迁移脚本 - 为News表添加source_name和source_icon字段 +版本:v2.2.0 +日期:2025-01-30 +""" +import pymysql +import os +from dotenv import load_dotenv + +# 加载环境变量 +load_dotenv() + +def migrate(): + """执行数据库迁移""" + # 数据库配置 + db_config = { + 'host': os.environ.get('DB_HOST', 'localhost'), + 'port': int(os.environ.get('DB_PORT', 3306)), + 'user': os.environ.get('DB_USER', 'root'), + 'password': os.environ.get('DB_PASSWORD', ''), + 'database': os.environ.get('DB_NAME', 'ai_nav'), + 'charset': 'utf8mb4' + } + + try: + # 连接数据库 + connection = pymysql.connect(**db_config) + cursor = connection.cursor() + + print("=" * 60) + print("开始执行数据库迁移 v2.2.0") + print("=" * 60) + + # 检查字段是否已存在 + cursor.execute(""" + SELECT COLUMN_NAME + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = %s + AND TABLE_NAME = 'news' + AND COLUMN_NAME IN ('source_name', 'source_icon') + """, (db_config['database'],)) + + existing_columns = [row[0] for row in cursor.fetchall()] + + # 添加 source_name 字段 + if 'source_name' not in existing_columns: + print("\n1. 添加 source_name 字段...") + cursor.execute(""" + ALTER TABLE news + ADD COLUMN source_name VARCHAR(100) + COMMENT '新闻来源网站名称' + AFTER url + """) + print(">>> source_name 字段添加成功") + else: + print("\n1. source_name 字段已存在,跳过") + + # 添加 source_icon 字段 + if 'source_icon' not in existing_columns: + print("\n2. 添加 source_icon 字段...") + cursor.execute(""" + ALTER TABLE news + ADD COLUMN source_icon VARCHAR(500) + COMMENT '新闻来源网站图标URL' + AFTER source_name + """) + print(">>> source_icon 字段添加成功") + else: + print("\n2. source_icon 字段已存在,跳过") + + # 提交事务 + connection.commit() + + print("\n" + "=" * 60) + print(">>> 数据库迁移完成!") + print("=" * 60) + + # 显示表结构 + print("\n当前 news 表结构:") + cursor.execute("DESCRIBE news") + for row in cursor.fetchall(): + print(f" - {row[0]}: {row[1]} {row[2]}") + + except Exception as e: + print(f"\n>>> 迁移失败:{str(e)}") + if 'connection' in locals(): + connection.rollback() + raise + + finally: + if 'cursor' in locals(): + cursor.close() + if 'connection' in locals(): + connection.close() + print("\n数据库连接已关闭") + + +if __name__ == '__main__': + migrate() diff --git a/models.py b/models.py index fae1c97..8f6ba00 100644 --- a/models.py +++ b/models.py @@ -90,6 +90,8 @@ class News(db.Model): content = db.Column(db.Text, comment='新闻内容') news_type = db.Column(db.String(50), default='Industry News', comment='新闻类型') url = db.Column(db.String(500), comment='新闻链接') + source_name = db.Column(db.String(100), comment='新闻来源网站名称') + source_icon = db.Column(db.String(500), comment='新闻来源网站图标URL') published_at = db.Column(db.DateTime, default=datetime.now, comment='发布时间') is_active = db.Column(db.Boolean, default=True, comment='是否启用') created_at = db.Column(db.DateTime, default=datetime.now, comment='创建时间') diff --git a/templates/admin/master.html b/templates/admin/master.html index e499e4a..47ca379 100644 --- a/templates/admin/master.html +++ b/templates/admin/master.html @@ -85,6 +85,12 @@ 批量导入 +