Files
UrlNav/Plugin.php

5080 lines
205 KiB
PHP
Raw Permalink Normal View History

2026-02-23 20:15:55 +08:00
<?php
if (!defined('__TYPECHO_ROOT_DIR__')) exit;
/**
* 网址导航、RSS信息阅读
*
* @package UrlNav
* @author 石头厝
* @version 1.5.0
* @link https://www.shitoucuo.com
*/
class UrlNav_Plugin implements Typecho_Plugin_Interface
{
/**
* 数据库文件路径
*/
private static $dbPath;
/**
* RSS刷新管理器实例
*/
private static $rssManager = null;
/**
* 激活插件方法
*/
public static function activate()
{
// 初始化数据库
self::initDbPath();
self::initDatabase();
self::migrateDatabase();
// 添加后台管理菜单
Helper::addPanel(3, 'UrlNav/Manage.php', '网址导航', '网址导航管理', 'administrator');
Helper::addPanel(3, 'UrlNav/Rss.php', 'RSS信息', 'RSS信息阅读', 'administrator');
// 添加动作处理
Helper::addAction('urlnav', 'UrlNav_Action');
// 注册路由 - 使用与Collection相同的格式
Helper::addRoute('urlnav_action', '/action/urlnav', 'UrlNav_Action', 'action');
// 注册独立的定时任务路由(网站状态检查)
Helper::addRoute('urlnav_status_cron', '/urlnav-status-cron', 'UrlNav_Action', 'statusCron');
// 注册独立的定时任务路由RSS刷新
Helper::addRoute('urlnav_rss_cron', '/urlnav-rss-cron', 'UrlNav_Action', 'rssCron');
return _t('网址导航插件已激活');
}
/**
* 禁用插件方法
*/
public static function deactivate()
{
// 移除管理菜单
Helper::removePanel(3, 'UrlNav/Manage.php');
Helper::removePanel(3, 'UrlNav/Rss.php');
Helper::removeAction('urlnav');
Helper::removeRoute('urlnav_action');
Helper::removeRoute('urlnav_status_cron');
Helper::removeRoute('urlnav_rss_cron');
return _t('网址导航插件已禁用');
}
/**
* 获取插件配置面板
*/
public static function config(Typecho_Widget_Helper_Form $form)
{
// ================== 网址管理配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>网址管理配置</h2>');
// 每页显示数量
$pageSize = new Typecho_Widget_Helper_Form_Element_Text('pageSize', null, '20',
_t('每页显示数量'), _t('后台管理中每页显示的网址数量'));
$pageSize->input->setAttribute('class', 'mini');
$form->addInput($pageSize);
// 是否开启网址验证
$validateUrl = new Typecho_Widget_Helper_Form_Element_Radio('validateUrl', array(
'1' => _t('开启'),
'0' => _t('关闭')
), '1', _t('网址验证'), _t('新增网址时是否验证网址有效性'));
$form->addInput($validateUrl);
// ================== 全文抓取配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>全文抓取配置</h2>');
// 是否开启全文抓取
$enableFullText = new Typecho_Widget_Helper_Form_Element_Radio('enableFullText', array(
'1' => _t('开启'),
'0' => _t('关闭')
), '0', _t('开启全文抓取'), _t('开启后会对白名单中的网站自动抓取全文'));
$form->addInput($enableFullText);
// 白名单配置(多行文本)
$fullTextWhitelist = new Typecho_Widget_Helper_Form_Element_Textarea('fullTextWhitelist', null,
"https://wiki.eryajf.net/learning-weekly.xml|.markdown-body\nhttps://example.com/rss|#content",
_t('全文抓取白名单'),
_t('每行一个格式RSS地址|内容选择器CSS选择器<br>示例https://wiki.eryajf.net/learning-weekly.xml|.post-content'));
$form->addInput($fullTextWhitelist);
// 每个站点抓取全文的篇数
$fullTextPerSite = new Typecho_Widget_Helper_Form_Element_Text('fullTextPerSite', null, '3',
_t('每站抓取全文篇数'), _t('每个RSS源最多抓取几篇的全文建议1-5'));
$fullTextPerSite->input->setAttribute('class', 'mini');
$form->addInput($fullTextPerSite);
// 页面抓取超时时间(单篇文章)
$pageFetchTimeout = new Typecho_Widget_Helper_Form_Element_Text('pageFetchTimeout', null, '8',
_t('页面抓取超时时间(秒)'), _t('抓取单篇文章页面时的超时时间建议8-15秒'));
$pageFetchTimeout->input->setAttribute('class', 'mini');
$form->addInput($pageFetchTimeout);
// ================== RSS配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>RSS配置</h2>');
// RSS页面每页显示数量
$rssPageSize = new Typecho_Widget_Helper_Form_Element_Text('rssPageSize', null, '30',
_t('RSS页面每页显示数量'), _t('RSS信息页面每页显示的文章数量'));
$rssPageSize->input->setAttribute('class', 'mini');
$form->addInput($rssPageSize);
// RSS刷新间隔
$rssRefresh = new Typecho_Widget_Helper_Form_Element_Text('rssRefresh', null, '3600',
_t('RSS刷新间隔'), _t('建议的RSS刷新间隔时间实际执行时间由宝塔计划任务决定'));
$rssRefresh->input->setAttribute('class', 'mini');
$form->addInput($rssRefresh);
// 【新增】每次自动刷新网址数量
$rssRefreshLimit = new Typecho_Widget_Helper_Form_Element_Text('rssRefreshLimit', null, '20',
_t('每次自动刷新网址数量'), _t('每次定时任务最多刷新的RSS网址数量建议10-50根据服务器性能调整'));
$rssRefreshLimit->input->setAttribute('class', 'mini');
$form->addInput($rssRefreshLimit);
// 每个站点最大文章数
$maxFeedsPerSite = new Typecho_Widget_Helper_Form_Element_Text('maxFeedsPerSite', null, '5',
_t('每个站点最大文章数'), _t('每个RSS源最多显示的文章数量'));
$maxFeedsPerSite->input->setAttribute('class', 'mini');
$form->addInput($maxFeedsPerSite);
// RSS文章保留时间改为下拉框
$rssKeepTime = new Typecho_Widget_Helper_Form_Element_Select('rssKeepTime',
array(
'0' => _t('不自动清理(默认)'), // ← 将"默认"标识放在这里
'86400' => _t('一天之前24小时前'),
'259200' => _t('三天之前72小时前'),
'604800' => _t('一周之前7天前'),
'1296000' => _t('半个月之前15天前'),
'2592000' => _t('一个月之前30天前'),
'7776000' => _t('三个月之前90天前'),
'15552000' => _t('半年之前180天前')
),
'259200', // ← 这里改为 0默认不清理
_t('RSS文章保留时间'),
_t('自动清理超过此时间的RSS文章按照文章发布时间判断默认不自动清理'));
$form->addInput($rssKeepTime);
// RSS最大缓存条数
$maxCachePerSite = new Typecho_Widget_Helper_Form_Element_Text('maxCachePerSite', null, '5',
_t('每个站点最大缓存条数'), _t('每个RSS源最多缓存的文章数量0表示不限制'));
$maxCachePerSite->input->setAttribute('class', 'mini');
$form->addInput($maxCachePerSite);
// 连接超时时间
$fetchTimeout = new Typecho_Widget_Helper_Form_Element_Text('fetchTimeout', null, '5',
_t('RSS抓取超时时间'), _t('抓取RSS源时的超时时间'));
$fetchTimeout->input->setAttribute('class', 'mini');
$form->addInput($fetchTimeout);
// 失败重试次数
$retryTimes = new Typecho_Widget_Helper_Form_Element_Text('retryTimes', null, '2',
_t('失败重试次数'), _t('RSS抓取失败时的重试次数'));
$retryTimes->input->setAttribute('class', 'mini');
$form->addInput($retryTimes);
// ================== 网站状态检查配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>网站状态检查配置</h2>');
// 状态检查超时时间
$statusCheckTimeout = new Typecho_Widget_Helper_Form_Element_Text('statusCheckTimeout', null, '8',
_t('状态检查超时时间(秒)'), _t('检查网站状态时的超时时间'));
$statusCheckTimeout->input->setAttribute('class', 'mini');
$form->addInput($statusCheckTimeout);
// 每次检查的最大数量
$statusCheckMax = new Typecho_Widget_Helper_Form_Element_Text('statusCheckMax', null, '80',
_t('每次检查最大数量'), _t('每次自动检查时最多检查的网址数量'));
$statusCheckMax->input->setAttribute('class', 'mini');
$form->addInput($statusCheckMax);
// ================== 定时任务配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>定时任务配置</h2>');
// RSS定时任务访问密钥
$rssCronSecret = new Typecho_Widget_Helper_Form_Element_Text('rssCronSecret', null, self::generateSecret(),
_t('RSS定时任务密钥'), _t('用于RSS定时任务访问的密钥请妥善保管'));
$form->addInput($rssCronSecret);
// 状态检查定时任务访问密钥
$statusCronSecret = new Typecho_Widget_Helper_Form_Element_Text('statusCronSecret', null, self::generateSecret(),
_t('状态检查定时任务密钥'), _t('用于状态检查定时任务访问的密钥,请妥善保管'));
$form->addInput($statusCronSecret);
}
/**
* 个人用户的配置面板
*/
public static function personalConfig(Typecho_Widget_Helper_Form $form) {}
/**
* 初始化数据库路径
*/
private static function initDbPath()
{
$dbDir = __DIR__ . '/db';
// 确保目录存在
if (!is_dir($dbDir)) {
@mkdir($dbDir, 0755, true);
}
$dbFiles = glob($dbDir . '/urlnav_*.db');
if (!empty($dbFiles)) {
self::$dbPath = $dbFiles[0];
} else {
$randomStr = substr(md5(uniqid(rand(), true)), 0, 10);
self::$dbPath = $dbDir . '/urlnav_' . $randomStr . '.db';
}
}
/**
* 生成随机密钥
*/
private static function generateSecret()
{
return substr(md5(uniqid(rand(), true) . time()), 0, 16);
}
public static function getCategoryStats($categoryId) {
$db = self::getDbConnection();
// 获取网址总数
$stmt = $db->prepare("SELECT COUNT(*) as url_count FROM urlnav_urls WHERE category_id = ? AND is_active = 1");
$stmt->execute([$categoryId]);
$urlCount = $stmt->fetchColumn();
// 获取有RSS的网址数
$stmt = $db->prepare("SELECT COUNT(*) as rss_count FROM urlnav_urls WHERE category_id = ? AND rss_url IS NOT NULL AND rss_url != '' AND is_active = 1");
$stmt->execute([$categoryId]);
$rssCount = $stmt->fetchColumn();
return [
'url_count' => (int)$urlCount,
'rss_count' => (int)$rssCount
];
}
/**
* 初始化数据库
*/
private static function initDatabase()
{
if (empty(self::$dbPath)) {
self::initDbPath();
}
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// 检查分类表是否存在
$tableCheck = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_categories'");
if (!$tableCheck->fetch()) {
// 创建分类表
$db->exec("CREATE TABLE urlnav_categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
description TEXT,
sort_order INTEGER DEFAULT 0,
is_active INTEGER DEFAULT 1,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
)");
// 插入默认分类
$db->exec("INSERT INTO urlnav_categories (name, description, sort_order) VALUES
('常用工具', '日常使用的在线工具', 1),
('设计资源', '设计相关的素材和资源', 2),
('开发资源', '程序开发相关资源', 3),
('技术社区', '技术交流和学习社区', 4)");
}
// 检查网址表是否存在
$tableCheck2 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_urls'");
if (!$tableCheck2->fetch()) {
// 创建网址表
$db->exec("CREATE TABLE urlnav_urls (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT NOT NULL,
description TEXT,
rss_url TEXT,
category_id INTEGER,
star_rating INTEGER DEFAULT 0, -- 新增星级评分0-3表示0-3颗星
sort_order INTEGER DEFAULT 0,
is_active INTEGER DEFAULT 1,
is_online INTEGER DEFAULT 1,
last_status_check DATETIME,
status_check_count INTEGER DEFAULT 0,
last_status_code INTEGER,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_refresh DATETIME,
refresh_count INTEGER DEFAULT 0,
success_count INTEGER DEFAULT 0,
failure_count INTEGER DEFAULT 0,
last_error TEXT,
FOREIGN KEY (category_id) REFERENCES urlnav_categories(id) ON DELETE SET NULL
)");
// 创建索引
$db->exec("CREATE INDEX idx_category_id ON urlnav_urls(category_id)");
$db->exec("CREATE INDEX idx_is_active ON urlnav_urls(is_active)");
$db->exec("CREATE INDEX idx_rss_url ON urlnav_urls(rss_url)");
$db->exec("CREATE INDEX idx_last_refresh ON urlnav_urls(last_refresh)");
$db->exec("CREATE INDEX idx_is_online ON urlnav_urls(is_online)");
$db->exec("CREATE INDEX idx_last_status_check ON urlnav_urls(last_status_check)");
}
// 创建RSS缓存表 - 修改添加full_content字段
$tableCheck3 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_rss_cache'");
if (!$tableCheck3->fetch()) {
$db->exec("CREATE TABLE urlnav_rss_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url_id INTEGER NOT NULL,
feed_title TEXT NOT NULL,
feed_link TEXT NOT NULL,
feed_description TEXT,
full_content TEXT, -- 新增:完整内容字段
pub_date DATETIME NOT NULL,
guid TEXT NOT NULL,
cached_at DATETIME DEFAULT CURRENT_TIMESTAMP,
is_fresh INTEGER DEFAULT 1,
FOREIGN KEY (url_id) REFERENCES urlnav_urls(id) ON DELETE CASCADE,
UNIQUE(url_id, guid)
)");
$db->exec("CREATE INDEX idx_url_id ON urlnav_rss_cache(url_id)");
$db->exec("CREATE INDEX idx_pub_date ON urlnav_rss_cache(pub_date)");
$db->exec("CREATE INDEX idx_cached_at ON urlnav_rss_cache(cached_at)");
$db->exec("CREATE INDEX idx_is_fresh ON urlnav_rss_cache(is_fresh)");
$db->exec("CREATE UNIQUE INDEX idx_url_guid ON urlnav_rss_cache(url_id, guid)");
}
// 创建收藏表 - 新增
$tableCheck8 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_favorites'");
if (!$tableCheck8->fetch()) {
$db->exec("CREATE TABLE urlnav_favorites (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER DEFAULT 0,
feed_id INTEGER NOT NULL,
feed_title TEXT NOT NULL,
feed_link TEXT NOT NULL,
feed_description TEXT,
full_content TEXT, -- 新增:完整内容字段
pub_date DATETIME NOT NULL,
site_title TEXT,
site_url TEXT,
category_name TEXT,
favorited_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, feed_id)
)");
$db->exec("CREATE INDEX idx_favorite_user_id ON urlnav_favorites(user_id)");
$db->exec("CREATE INDEX idx_favorite_feed_id ON urlnav_favorites(feed_id)");
$db->exec("CREATE INDEX idx_favorite_created_at ON urlnav_favorites(favorited_at)");
}
// 创建RSS刷新记录表
$tableCheck4 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_refresh_log'");
if (!$tableCheck4->fetch()) {
$db->exec("CREATE TABLE urlnav_refresh_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
refresh_type TEXT NOT NULL,
success_count INTEGER DEFAULT 0,
total_feeds INTEGER DEFAULT 0,
url_count INTEGER DEFAULT 0,
new_articles INTEGER DEFAULT 0,
error_message TEXT,
refresh_time DATETIME DEFAULT CURRENT_TIMESTAMP,
duration INTEGER DEFAULT 0,
cron_type TEXT DEFAULT 'rss' -- 新增区分RSS和状态检查
)");
$db->exec("CREATE INDEX idx_refresh_time ON urlnav_refresh_log(refresh_time)");
$db->exec("CREATE INDEX idx_refresh_type ON urlnav_refresh_log(refresh_type)");
$db->exec("CREATE INDEX idx_cron_type ON urlnav_refresh_log(cron_type)");
}
// 创建定时任务记录表
$tableCheck5 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_cron_log'");
if (!$tableCheck5->fetch()) {
$db->exec("CREATE TABLE urlnav_cron_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
cron_type TEXT NOT NULL,
executed_time DATETIME DEFAULT CURRENT_TIMESTAMP,
result TEXT,
error_message TEXT
)");
$db->exec("CREATE INDEX idx_executed_time ON urlnav_cron_log(executed_time)");
$db->exec("CREATE INDEX idx_cron_type ON urlnav_cron_log(cron_type)");
}
// 创建状态检查记录表
$tableCheck6 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_status_log'");
if (!$tableCheck6->fetch()) {
$db->exec("CREATE TABLE urlnav_status_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url_id INTEGER NOT NULL,
is_online INTEGER DEFAULT 0,
status_code INTEGER,
response_time INTEGER,
check_time DATETIME DEFAULT CURRENT_TIMESTAMP,
error_message TEXT,
FOREIGN KEY (url_id) REFERENCES urlnav_urls(id) ON DELETE CASCADE
)");
$db->exec("CREATE INDEX idx_url_id_status ON urlnav_status_log(url_id)");
$db->exec("CREATE INDEX idx_check_time ON urlnav_status_log(check_time)");
$db->exec("CREATE INDEX idx_is_online_status ON urlnav_status_log(is_online)");
}
// 创建状态检查统计表
$tableCheck7 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_status_stats'");
if (!$tableCheck7->fetch()) {
$db->exec("CREATE TABLE urlnav_status_stats (
id INTEGER PRIMARY KEY AUTOINCREMENT,
total_checks INTEGER DEFAULT 0,
success_checks INTEGER DEFAULT 0,
failed_checks INTEGER DEFAULT 0,
avg_response_time REAL DEFAULT 0,
last_check_time DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
)");
// 初始化一条记录
$db->exec("INSERT INTO urlnav_status_stats (total_checks, success_checks, failed_checks, avg_response_time) VALUES (0, 0, 0, 0)");
}
// 创建更新时间触发器
$db->exec("CREATE TRIGGER IF NOT EXISTS update_category_time
AFTER UPDATE ON urlnav_categories
BEGIN
UPDATE urlnav_categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END");
$db->exec("CREATE TRIGGER IF NOT EXISTS update_url_time
AFTER UPDATE ON urlnav_urls
BEGIN
UPDATE urlnav_urls SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END");
$db = null;
} catch (PDOException $e) {
error_log('UrlNav: 数据库初始化失败: ' . $e->getMessage());
}
}
/**
* 数据库迁移
*/
private static function migrateDatabase()
{
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// 检查是否需要添加字段
$tableInfo = $db->query("PRAGMA table_info(urlnav_urls)");
$columns = $tableInfo->fetchAll(PDO::FETCH_ASSOC);
$newColumns = array(
'rss_url' => "ALTER TABLE urlnav_urls ADD COLUMN rss_url TEXT",
'last_refresh' => "ALTER TABLE urlnav_urls ADD COLUMN last_refresh DATETIME",
'refresh_count' => "ALTER TABLE urlnav_urls ADD COLUMN refresh_count INTEGER DEFAULT 0",
'success_count' => "ALTER TABLE urlnav_urls ADD COLUMN success_count INTEGER DEFAULT 0",
'failure_count' => "ALTER TABLE urlnav_urls ADD COLUMN failure_count INTEGER DEFAULT 0",
'last_error' => "ALTER TABLE urlnav_urls ADD COLUMN last_error TEXT",
'is_online' => "ALTER TABLE urlnav_urls ADD COLUMN is_online INTEGER DEFAULT 1",
'last_status_check' => "ALTER TABLE urlnav_urls ADD COLUMN last_status_check DATETIME",
'status_check_count' => "ALTER TABLE urlnav_urls ADD COLUMN status_check_count INTEGER DEFAULT 0",
'last_status_code' => "ALTER TABLE urlnav_urls ADD COLUMN last_status_code INTEGER"
);
foreach ($newColumns as $columnName => $sql) {
$hasColumn = false;
foreach ($columns as $column) {
if ($column['name'] === $columnName) {
$hasColumn = true;
break;
}
}
if (!$hasColumn) {
$db->exec($sql);
}
}
// 检查缓存表是否需要添加is_fresh字段
$cacheTableInfo = $db->query("PRAGMA table_info(urlnav_rss_cache)");
$cacheColumns = $cacheTableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasIsFresh = false;
foreach ($cacheColumns as $column) {
if ($column['name'] === 'is_fresh') {
$hasIsFresh = true;
break;
}
}
if (!$hasIsFresh) {
$db->exec("ALTER TABLE urlnav_rss_cache ADD COLUMN is_fresh INTEGER DEFAULT 1");
$db->exec("CREATE INDEX IF NOT EXISTS idx_is_fresh ON urlnav_rss_cache(is_fresh)");
}
// 检查是否需要添加star_rating字段
$hasStarRating = false;
foreach ($columns as $column) {
if ($column['name'] === 'star_rating') {
$hasStarRating = true;
break;
}
}
if (!$hasStarRating) {
$db->exec("ALTER TABLE urlnav_urls ADD COLUMN star_rating INTEGER DEFAULT 0");
error_log("UrlNav: 已添加star_rating字段到urlnav_urls表");
}
// 检查缓存表是否需要添加full_content字段
$hasFullContent = false;
foreach ($cacheColumns as $column) {
if ($column['name'] === 'full_content') {
$hasFullContent = true;
break;
}
}
if (!$hasFullContent) {
$db->exec("ALTER TABLE urlnav_rss_cache ADD COLUMN full_content TEXT");
$db->exec("ALTER TABLE urlnav_favorites ADD COLUMN full_content TEXT");
}
// 检查refresh_log表是否需要添加cron_type字段
$refreshLogTableInfo = $db->query("PRAGMA table_info(urlnav_refresh_log)");
$refreshLogColumns = $refreshLogTableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasCronType = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'cron_type') {
$hasCronType = true;
break;
}
}
if (!$hasCronType) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN cron_type TEXT DEFAULT 'rss'");
$db->exec("CREATE INDEX IF NOT EXISTS idx_cron_type ON urlnav_refresh_log(cron_type)");
}
// ===== 修复关键添加缺失的new_articles字段 =====
$hasNewArticles = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'new_articles') {
$hasNewArticles = true;
break;
}
}
if (!$hasNewArticles) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN new_articles INTEGER DEFAULT 0");
error_log("UrlNav: 已添加new_articles字段到urlnav_refresh_log表");
}
// 🔴 新增检查是否需要添加message字段
$hasMessage = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'message') {
$hasMessage = true;
break;
}
}
if (!$hasMessage) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN message TEXT");
error_log("UrlNav: 已添加message字段到urlnav_refresh_log表");
}
// 🔴 新增检查是否需要添加details字段
$hasDetails = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'details') {
$hasDetails = true;
break;
}
}
if (!$hasDetails) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN details TEXT");
error_log("UrlNav: 已添加details字段到urlnav_refresh_log表");
}
// ===== 修复结束 =====
$db = null;
} catch (PDOException $e) {
error_log('UrlNav数据库迁移失败: ' . $e->getMessage());
}
}
/**
* 获取数据库连接 - 优化版,解决数据库锁问题
*/
public static function getDbConnection()
{
if (empty(self::$dbPath)) {
self::initDbPath();
}
if (!file_exists(self::$dbPath)) {
self::initDatabase();
}
$maxRetries = 3;
$retryDelay = 1; // 秒
for ($retry = 0; $retry < $maxRetries; $retry++) {
if ($retry > 0) {
error_log("UrlNav: 数据库连接重试 {$retry},等待 {$retryDelay} 秒...");
sleep($retryDelay);
}
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$db->exec('PRAGMA foreign_keys = ON');
$db->exec('PRAGMA busy_timeout = 3000'); // 设置3秒超时
$db->exec('PRAGMA journal_mode = WAL'); // 使用WAL模式提高并发性能
return $db;
} catch (PDOException $e) {
if (strpos($e->getMessage(), 'database is locked') !== false && $retry < $maxRetries - 1) {
continue;
}
throw new Exception('数据库连接失败: ' . $e->getMessage());
}
}
throw new Exception('数据库连接失败:重试' . $maxRetries . '次后仍被锁定');
}
/**
* 获取插件配置
*/
public static function getConfig()
{
static $config = null;
if ($config === null) {
$options = Typecho_Widget::widget('Widget_Options');
$config = $options->plugin('UrlNav');
}
return $config;
}
/**
* 获取RSS管理器
*/
private static function getRssManager()
{
if (self::$rssManager === null) {
self::$rssManager = new UrlNav_RssManager();
}
return self::$rssManager;
}
public static function executeRssCronTask()
{
// 立即设置响应头防止502
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出让Nginx知道脚本在运行
echo json_encode(['status' => 'starting', 'timestamp' => time()]);
flush();
ob_flush();
}
$startTime = microtime(true);
try {
error_log("UrlNav RSS定时任务: 开始执行 " . date('Y-m-d H:i:s'));
// 设置更长的执行时间
@set_time_limit(300); // 5分钟
@ini_set('max_execution_time', 300);
// 添加简单的锁检查,防止多个进程同时执行
$lockFile = __DIR__ . '/db/rss_cron_running.lock';
$lockTimeout = 1800; // 30分钟超时
if (file_exists($lockFile)) {
$lockTime = @filemtime($lockFile);
if ($lockTime && (time() - $lockTime) < $lockTimeout) {
error_log("UrlNav RSS定时任务: 跳过执行,已在运行中");
return array(
'success' => false,
'message' => '定时任务已在运行中,跳过本次执行',
'timestamp' => time()
);
}
// 锁已超时,删除它
@unlink($lockFile);
}
// 创建锁文件
@touch($lockFile);
@file_put_contents($lockFile, "Started at: " . date('Y-m-d H:i:s'));
register_shutdown_function(function() use ($lockFile) {
if (file_exists($lockFile)) {
@unlink($lockFile);
error_log("UrlNav: shutdown函数删除RSS锁文件");
}
});
// 执行刷新任务
$refreshResult = self::refreshAllRssFeeds(true);
$duration = round(microtime(true) - $startTime, 2);
// 删除锁文件
if (file_exists($lockFile)) {
@unlink($lockFile);
}
// 🆕 修改确保result包含RSS地址信息
$result = array(
'success' => $refreshResult['success'],
'refreshed' => true,
'refresh_result' => $refreshResult,
'timestamp' => time(),
'duration' => $duration,
'message' => $refreshResult['message'],
// 🆕 关键直接包含RSS地址信息
'successRssUrls' => $refreshResult['successRssUrls'] ?? array(),
'failedRssUrls' => $refreshResult['failedRssUrls'] ?? array()
);
// 记录日志会自动将上面的result转为JSON存入数据库
self::logCron('rss_auto_refresh', $result);
error_log("UrlNav RSS定时任务: 执行完成,耗时 {$duration}");
return $result;
} catch (Exception $e) {
error_log("UrlNav RSS定时任务异常: " . $e->getMessage());
// 确保锁文件被删除
$lockFile = __DIR__ . '/db/rss_cron_running.lock';
if (file_exists($lockFile)) {
@unlink($lockFile);
}
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => 'RSS定时任务执行异常',
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
}
/**
* 执行状态检查定时任务 - 完全移除锁机制
*/
public static function executeStatusCronTask()
{
try {
$startTime = microtime(true);
error_log("UrlNav 状态检查定时任务: 开始执行 " . date('Y-m-d H:i:s'));
// 修改这里:调用正确的自动检查方式
$statusResult = self::manualCheckStatus(null, false); // $urlIds=null, $isBatchCheck=false
$endTime = microtime(true);
$duration = round($endTime - $startTime, 2);
// 更新状态检查统计
self::updateStatusStats($statusResult);
// 记录状态检查专用的定时任务日志
self::logCron('status_auto_check', json_encode(array_merge($statusResult, array(
'duration' => $duration,
'timestamp' => time()
))));
if ($statusResult['success']) {
error_log("UrlNav 状态检查定时任务: 执行成功,耗时 {$duration}");
return array(
'success' => true,
'status_checked' => $statusResult['total'] > 0,
'status_result' => $statusResult,
'timestamp' => time(),
'duration' => $duration,
'message' => '状态检查定时任务执行成功'
);
} else {
error_log("UrlNav 状态检查定时任务: 执行失败: " . $statusResult['message']);
return array(
'success' => false,
'status_checked' => false,
'status_result' => $statusResult,
'timestamp' => time(),
'duration' => $duration,
'message' => '状态检查定时任务执行失败'
);
}
} catch (Exception $e) {
error_log("UrlNav 状态检查定时任务异常: " . $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => '状态检查定时任务执行异常'
);
}
}
/**
* 通用的锁定任务执行器 - 优化版,减少锁竞争
*/
private static function executeLockedTask($lockFile, $taskType, $callback)
{
$lockTimeout = 3600; // 延长到1小时超时
// 简化的锁检查:如果锁文件存在且未超时,直接跳过
if (file_exists($lockFile)) {
$lockTime = @filemtime($lockFile);
if ($lockTime && (time() - $lockTime) < $lockTimeout) {
$lockDuration = time() - $lockTime;
error_log("UrlNav {$taskType}: 跳过执行,锁文件存在 {$lockDuration}");
return array(
'success' => false,
'message' => "{$taskType}定时任务正在运行,跳过本次执行",
'timestamp' => time(),
'lock_time' => $lockTime,
'lock_duration' => $lockDuration
);
}
// 锁已超时,删除它
@unlink($lockFile);
error_log("UrlNav {$taskType}: 删除超时的锁文件(已存在超过 {$lockTimeout} 秒)");
}
// 创建锁文件
if (!@touch($lockFile)) {
error_log("UrlNav {$taskType}: 无法创建锁文件");
return array(
'success' => false,
'message' => '无法创建锁文件',
'timestamp' => time()
);
}
// 在锁文件中记录开始时间
file_put_contents($lockFile, "Started at: " . date('Y-m-d H:i:s') . "\nTask type: {$taskType}");
error_log("UrlNav {$taskType}: 开始执行定时任务 " . date('Y-m-d H:i:s'));
try {
// 确保锁文件会被删除(即使脚本意外终止)
register_shutdown_function(function() use ($lockFile, $taskType) {
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: shutdown函数删除锁文件锁持续了 {$lockDuration}");
}
});
// 执行回调函数
$result = $callback();
// 删除锁文件
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: 任务完成,删除锁文件,任务耗时 {$lockDuration}");
}
return $result;
} catch (Exception $e) {
// 确保锁文件被删除
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: 异常时删除锁文件,锁持续了 {$lockDuration}");
}
error_log("UrlNav {$taskType}定时任务异常: " . $e->getMessage());
self::logCron('error', $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => "{$taskType}定时任务执行异常"
);
}
}
/**
* 手动解锁定时任务(供调试使用)
*/
public static function unlockCron($cronType = 'rss')
{
if ($cronType === 'rss') {
$lockFile = __DIR__ . '/db/rss_cron.lock';
} elseif ($cronType === 'status') {
$lockFile = __DIR__ . '/db/status_cron.lock';
} else {
$lockFile = __DIR__ . '/db/cron.lock';
}
if (file_exists($lockFile)) {
if (@unlink($lockFile)) {
error_log("UrlNav: 手动解锁{$cronType}成功");
return array(
'success' => true,
'message' => "{$cronType}定时任务锁已解除",
'timestamp' => time()
);
} else {
error_log("UrlNav: 手动解锁{$cronType}失败");
return array(
'success' => false,
'message' => '无法删除锁文件',
'timestamp' => time()
);
}
} else {
return array(
'success' => true,
'message' => "{$cronType}没有锁文件存在",
'timestamp' => time()
);
}
}
public static function refreshAllRssFeeds($isCron = false)
{
$startTime = microtime(true);
// 🆕 新增:在定时任务中自动清理过期缓存
if ($isCron) {
self::cleanExpiredCache();
}
// 立即设置响应头避免502
if ($isCron && !headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出一些内容让Nginx知道脚本还在运行
echo '{"status":"starting","message":"RSS刷新任务开始...","timestamp":' . time() . '}';
flush();
ob_flush();
}
// 设置更长的执行时间
if ($isCron) {
@set_time_limit(300); // 5分钟
@ini_set('max_execution_time', 300);
}
try {
$db = self::getDbConnection();
// 使用后台配置的数量
$config = self::getConfig();
$limit = intval($config->rssRefreshLimit ?? 10);
$limit = max(1, min($limit, 30)); // 限制在1-30之间
error_log("===== UrlNav RSS刷新开始时间: " . date('Y-m-d H:i:s') . " =====");
error_log("配置数量: {$limit}");
// 优化查询:优先处理从未刷新或很久没刷新的
// 关键修复:添加时间条件,避免重复刷新刚刷过的
$sql = "
SELECT id, rss_url, url, title, last_refresh, failure_count, success_count, created_at
FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND (
-- 从未刷新过的
last_refresh IS NULL
-- 或者超过1小时没刷新的
OR last_refresh < datetime('now', '-1 hour')
-- 或者失败次数多且超过30分钟没重试
OR (failure_count > success_count AND last_refresh < datetime('now', '-30 minutes'))
)
ORDER BY
CASE
-- 最高优先级:从未刷新过的
WHEN last_refresh IS NULL THEN 0
-- 次高优先级:失败次数多于成功次数的
WHEN failure_count > success_count THEN 1
-- 中等优先级新添加的网址最近3天内
WHEN created_at > datetime('now', '-3 days') THEN 2
-- 低优先级:正常的
ELSE 3
END,
-- 按刷新时间从早到晚排序
CASE
WHEN last_refresh IS NULL THEN created_at
ELSE last_refresh
END ASC
LIMIT ?
";
$stmt = $db->prepare($sql);
$stmt->execute(array($limit));
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 如果没有符合条件的,放宽条件选择一些
if (empty($urls)) {
error_log("UrlNav: 没有需要立即刷新的RSS源选择一些较久没刷新的");
$sql = "
SELECT id, rss_url, url, title, last_refresh, failure_count, success_count
FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
ORDER BY last_refresh ASC NULLS FIRST
LIMIT ?
";
$stmt = $db->prepare($sql);
$stmt->execute(array(min($limit, 5))); // 少选几个
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
}
if (empty($urls)) {
error_log("UrlNav: 没有需要刷新的RSS网址");
return array(
'success' => true,
'message' => '没有需要刷新的RSS网址',
'successCount' => 0,
'failureCount' => 0,
'newArticles' => 0,
'totalFeeds' => 0,
'urlCount' => 0,
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
error_log("UrlNav: 获取到 " . count($urls) . " 个需要刷新的RSS源");
// 记录获取到的URL信息
foreach ($urls as $url) {
$refreshStatus = $url['last_refresh'] ?
"最后刷新: " . $url['last_refresh'] :
"从未刷新";
error_log("UrlNav: 选中 - ID: {$url['id']}, {$refreshStatus}, URL: {$url['rss_url']}");
}
$successCount = 0;
$failureCount = 0;
$totalFeeds = 0;
$newArticles = 0;
// 🆕 修改记录成功和失败的RSS地址
$successRssUrls = array();
$failedRssUrls = array();
// 配置参数
$timeout = intval($config->fetchTimeout ?? 15); // 默认15秒
$retryTimes = intval($config->retryTimes ?? 2); // 重试2次
$maxFeeds = intval($config->maxFeedsPerSite ?? 20); // 每个站点20条
// 关键:定期输出内容,保持连接活跃
$lastOutputTime = $startTime;
foreach ($urls as $index => $url) {
$currentTime = microtime(true);
$elapsedTime = $currentTime - $startTime;
// 检查总执行时间4分钟限制
if ($isCron && $elapsedTime > 240) {
error_log('UrlNav: 接近总超时4分钟停止处理');
break;
}
// 每3秒输出一次保持连接活跃防502关键
if ($isCron && ($currentTime - $lastOutputTime) > 3) {
if (!headers_sent()) {
echo '{"status":"processing","progress":"' . ($index+1) . '/' . count($urls) . '","timestamp":' . time() . '}';
flush();
ob_flush();
}
$lastOutputTime = $currentTime;
}
try {
error_log("UrlNav: [开始] 处理RSS #" . ($index+1) . " - ID: " . $url['id'] . ", URL: " . $url['rss_url']);
error_log("UrlNav: 最后刷新时间: " . ($url['last_refresh'] ?: '从未刷新'));
$urlResult = self::refreshSingleRssUrl($url, $timeout, $retryTimes, $maxFeeds);
if ($urlResult['success']) {
$successCount++;
$newArticles += $urlResult['new_articles'];
$totalFeeds += $urlResult['total_feeds'];
// 🆕 记录成功的RSS地址
$successRssUrls[] = $url['rss_url'];
error_log("UrlNav: [成功] ID: " . $url['id'] . ", 新增文章: " . $urlResult['new_articles'] . ", RSS: " . $url['rss_url']);
} else {
$failureCount++;
// 🆕 记录失败的RSS地址
$failedRssUrls[] = $url['rss_url'];
error_log("UrlNav: [失败] ID: " . $url['id'] . ", 错误: " . ($urlResult['error'] ?? '未知错误') . ", RSS: " . $url['rss_url']);
}
// 短暂休息,避免对目标服务器压力过大
if ($index < count($urls) - 1) { // 不是最后一个时休息
usleep(800000); // 0.8秒休息
}
} catch (Exception $e) {
$failureCount++;
// 🆕 记录异常的RSS地址
$failedRssUrls[] = $url['rss_url'] . " [异常]";
error_log('UrlNav: [异常] ID: ' . $url['id'] . ', RSS: ' . $url['rss_url'] . ', 异常: ' . $e->getMessage());
}
}
$duration = round(microtime(true) - $startTime, 2);
// 记录日志 - 现在传递成功和失败的RSS地址
self::logRefresh($isCron ? 'cron' : 'manual', $successCount, $totalFeeds,
count($urls), $newArticles, null, $duration, 'rss',
$successRssUrls, $failedRssUrls);
$message = "刷新完成:成功 {$successCount} 个,失败 {$failureCount}";
$result = array(
'success' => $successCount > 0 || count($urls) == 0,
'successCount' => $successCount,
'failureCount' => $failureCount,
'newArticles' => $newArticles,
'totalFeeds' => $totalFeeds,
'urlCount' => count($urls),
'duration' => $duration,
'message' => $message,
// 🆕 修改返回成功和失败的RSS地址
'successRssUrls' => $successRssUrls,
'failedRssUrls' => $failedRssUrls
);
error_log("UrlNav: [完成] RSS刷新完成耗时 {$duration} 秒,{$message}");
error_log("===== UrlNav RSS刷新结束 =====");
return $result;
} catch (Exception $e) {
error_log('UrlNav: [全局异常] 刷新失败: ' . $e->getMessage());
error_log("===== UrlNav RSS刷新异常结束 =====");
return array(
'success' => false,
'message' => '刷新失败: ' . $e->getMessage(),
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
}
/**
* 获取RSS刷新状态统计
*/
public static function getRssRefreshStatus()
{
try {
$db = self::getDbConnection();
// 获取统计信息
$stats = array();
// 总RSS源数量
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != ''");
$stats['total_rss_sources'] = $stmt->fetchColumn();
// 从未刷新的数量
$stmt = $db->query("SELECT COUNT(*) as never_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh IS NULL");
$stats['never_refreshed'] = $stmt->fetchColumn();
// 今天刷新的数量
$stmt = $db->query("SELECT COUNT(*) as today_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND date(last_refresh) = date('now')");
$stats['today_refreshed'] = $stmt->fetchColumn();
// 最近7天刷新的数量
$stmt = $db->query("SELECT COUNT(*) as week_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh >= datetime('now', '-7 days')");
$stats['week_refreshed'] = $stmt->fetchColumn();
// 最久未刷新的时间
$stmt = $db->query("SELECT MIN(last_refresh) as oldest_refresh FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh IS NOT NULL");
$oldest = $stmt->fetchColumn();
$stats['oldest_refresh'] = $oldest;
if ($oldest) {
$stats['oldest_days'] = round((time() - strtotime($oldest)) / 86400, 1);
}
// 需要刷新的数量超过1天没刷新的
$stmt = $db->query("SELECT COUNT(*) as need_refresh FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND (last_refresh IS NULL OR last_refresh < datetime('now', '-1 day'))");
$stats['need_refresh'] = $stmt->fetchColumn();
// 成功率统计
$stmt = $db->query("SELECT
SUM(success_count) as total_success,
SUM(failure_count) as total_failure,
SUM(refresh_count) as total_refreshes
FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != ''");
$countStats = $stmt->fetch(PDO::FETCH_ASSOC);
$stats['total_success'] = $countStats['total_success'] ?? 0;
$stats['total_failure'] = $countStats['total_failure'] ?? 0;
$stats['total_refreshes'] = $countStats['total_refreshes'] ?? 0;
$stats['success_rate'] = $stats['total_refreshes'] > 0 ?
round(($stats['total_success'] / $stats['total_refreshes']) * 100, 1) : 0;
return $stats;
} catch (Exception $e) {
error_log('UrlNav: 获取刷新状态失败: ' . $e->getMessage());
return array();
}
}
private static function refreshSingleRssUrl($url, $timeout = 8, $retryTimes = 1, $maxFeeds = 10)
{
$urlId = $url['id'];
$rssUrl = trim($url['rss_url']);
error_log("UrlNav: === 开始处理RSS ID: {$urlId} ===");
error_log("UrlNav: RSS URL: {$rssUrl}");
try {
$db = self::getDbConnection();
// 更新刷新统计
$stmt = $db->prepare("UPDATE urlnav_urls SET refresh_count = refresh_count + 1 WHERE id = ?");
$stmt->execute(array($urlId));
error_log("UrlNav: 更新刷新统计成功");
// 解析RSS内容
error_log("UrlNav: 开始解析RSS内容...");
$feeds = self::parseRssFeedWithRetry($rssUrl, $retryTimes, $timeout);
error_log("UrlNav: RSS解析完成获取到 " . count($feeds) . " 篇文章");
if (empty($feeds)) {
error_log("UrlNav: 没有获取到文章数据");
$stmt = $db->prepare("
UPDATE urlnav_urls SET
last_refresh = CURRENT_TIMESTAMP,
last_error = '无可用数据'
WHERE id = ?
");
$stmt->execute(array($urlId));
error_log("UrlNav: === 处理完成(无数据)===");
return array(
'success' => true,
'new_articles' => 0,
'total_feeds' => 0,
'error' => null
);
}
// 限制每个站点最大文章数
$feeds = array_slice($feeds, 0, $maxFeeds);
error_log("UrlNav: 限制后文章数: " . count($feeds));
$addedCount = 0;
foreach ($feeds as $feedIndex => $feed) {
try {
// 确保所有必要字段都有值
$title = !empty($feed['title']) ? substr($feed['title'], 0, 255) : '无标题';
$link = !empty($feed['link']) ? substr($feed['link'], 0, 500) : $url['url'];
$description = !empty($feed['description']) ? substr($feed['description'], 0, 1000) : '';
$fullContent = !empty($feed['full_content']) ? substr($feed['full_content'], 0, 5000) : $description; // 使用完整内容,如果不存在则使用描述
$pubDate = !empty($feed['pubDate']) ? $feed['pubDate'] : date('Y-m-d H:i:s');
$guid = !empty($feed['guid']) ? substr($feed['guid'], 0, 255) : md5($link . $pubDate);
error_log("UrlNav: 处理文章 #" . ($feedIndex+1) . ": {$title}");
// 使用INSERT OR IGNORE避免冲突
$stmt = $db->prepare("
INSERT OR IGNORE INTO urlnav_rss_cache
(url_id, feed_title, feed_link, feed_description, full_content, pub_date, guid, cached_at, is_fresh)
VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, 1)
");
$stmt->execute(array(
$urlId,
$title,
$link,
$description,
$fullContent,
$pubDate,
$guid
));
if ($stmt->rowCount() > 0) {
$addedCount++;
error_log("UrlNav: 文章 #" . ($feedIndex+1) . " 插入成功");
} else {
error_log("UrlNav: 文章 #" . ($feedIndex+1) . " 已存在,跳过");
}
} catch (Exception $e) {
error_log('UrlNav: 文章处理异常: ' . $e->getMessage());
// 继续处理下一篇文章
continue;
}
}
// 更新URL统计信息
$stmt = $db->prepare("
UPDATE urlnav_urls SET
success_count = success_count + 1,
last_refresh = CURRENT_TIMESTAMP,
last_error = NULL
WHERE id = ?
");
$stmt->execute(array($urlId));
error_log("UrlNav: 成功解析RSS - ID: {$urlId}, 获取到 " . count($feeds) . " 篇文章, 新增 {$addedCount}");
error_log("UrlNav: === 处理完成(成功)===");
return array(
'success' => true,
'new_articles' => $addedCount,
'total_feeds' => count($feeds),
'error' => null
);
} catch (Exception $e) {
// 记录错误信息
$errorMessage = substr($e->getMessage(), 0, 500);
error_log("UrlNav: RSS解析失败 - 错误: {$errorMessage}");
$stmt = $db->prepare("
UPDATE urlnav_urls SET
failure_count = failure_count + 1,
last_refresh = CURRENT_TIMESTAMP,
last_error = ?
WHERE id = ?
");
$stmt->execute(array($errorMessage, $urlId));
error_log("UrlNav: === 处理完成(失败)===");
return array(
'success' => false,
'new_articles' => 0,
'total_feeds' => 0,
'error' => $errorMessage
);
}
}
/**
* 获取需要刷新的网址数量 - 新增方法
*/
public static function getUrlsNeedingRefresh()
{
try {
$db = self::getDbConnection();
$sql = "
SELECT COUNT(*) as count FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND (
last_refresh IS NULL
OR last_refresh <= datetime('now', '-1 hour')
)
";
$stmt = $db->query($sql);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['count'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取需要刷新的网址数量失败: ' . $e->getMessage());
return 0;
}
}
/**
* 带重试机制的RSS解析 - 优化版
*/
private static function parseRssFeedWithRetry($rssUrl, $retryTimes = 1, $timeout = 8)
{
$lastError = null;
for ($i = 0; $i <= $retryTimes; $i++) {
try {
if ($i > 0) {
// 重试前等待一段时间
sleep($i * 2);
error_log("UrlNav: RSS重试第{$i}次: {$rssUrl}");
}
$feeds = self::parseRssFeed($rssUrl, $timeout);
return $feeds;
} catch (Exception $e) {
$lastError = $e;
$errorMsg = $e->getMessage();
// 如果是DNS错误尝试使用IP直接访问针对特定域名
if (strpos($errorMsg, 'getaddrinfo failed') !== false && strpos($rssUrl, 'windful.cn') !== false) {
// 尝试使用IP访问需要你知道windful.cn的IP
// $rssUrl = str_replace('https://windful.cn/', 'https://[IP地址]/', $rssUrl);
error_log("UrlNav: DNS解析失败建议检查windful.cn域名是否正常");
}
if ($i < $retryTimes) {
error_log("UrlNav: RSS解析失败" . ($i+1) . "次重试: " . $errorMsg);
}
}
}
// 所有重试都失败
throw new Exception("RSS解析失败: " . $lastError->getMessage());
}
/**
* 解析RSS源 - 完整功能增强版(修改全文字段逻辑)
*/
private static function parseRssFeed($rssUrl, $timeout = 8)
{
error_log("UrlNav: >>> 开始解析RSS: {$rssUrl}");
try {
// 设置超时时间(保持原样)
$context = stream_context_create(array(
'http' => array(
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n"
),
'ssl' => array(
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
)
));
error_log("UrlNav: 尝试获取RSS内容...");
$content = @file_get_contents($rssUrl, false, $context);
if ($content === false) {
$error = error_get_last();
$errorMsg = $error['message'] ?? '未知错误';
error_log("UrlNav: file_get_contents失败: {$errorMsg}");
if (isset($http_response_header)) {
error_log("UrlNav: HTTP响应头: " . implode(" | ", $http_response_header));
}
throw new Exception('无法获取RSS内容: ' . $errorMsg);
}
error_log("UrlNav: 获取内容成功,长度: " . strlen($content) . " 字节");
// 检查HTTP状态码保持原样
if (isset($http_response_header[0])) {
error_log("UrlNav: HTTP状态: {$http_response_header[0]}");
if (strpos($http_response_header[0], '404') !== false) {
throw new Exception('RSS源不存在 (404)');
}
if (strpos($http_response_header[0], '403') !== false) {
throw new Exception('拒绝访问 (403)');
}
if (strpos($http_response_header[0], '500') !== false) {
throw new Exception('服务器内部错误 (500)');
}
}
if (empty($content) || trim($content) === '') {
error_log("UrlNav: RSS内容为空");
throw new Exception('RSS内容为空');
}
// 处理可能存在的BOM头保持原样
if (substr($content, 0, 3) == "\xEF\xBB\xBF") {
$content = substr($content, 3);
error_log("UrlNav: 已移除BOM头");
}
// 简单的XML修复保持原样
$content = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', '', $content);
$content = preg_replace('/&(?!(amp|lt|gt|quot|apos|#\d+);)/', '&amp;', $content);
// 🆕 增强尝试多种XML解析方式确保兼容性
libxml_use_internal_errors(true);
libxml_clear_errors();
$xml = null;
// 方式1先尝试DOMDocument最兼容WordPress/Typecho
try {
$dom = new DOMDocument();
$dom->recover = true;
$dom->strictErrorChecking = false;
if (@$dom->loadXML($content)) {
error_log("UrlNav: 使用DOMDocument解析成功");
$xml = simplexml_import_dom($dom);
}
} catch (Exception $e) {
error_log("UrlNav: DOMDocument解析失败: " . $e->getMessage());
}
// 方式2如果DOMDocument失败使用SimpleXML
if ($xml === null) {
error_log("UrlNav: 尝试SimpleXML解析...");
$xml = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOCDATA);
}
if ($xml === false) {
$errorMsg = 'XML解析失败';
$xmlErrors = libxml_get_errors();
if (!empty($xmlErrors)) {
$errorMsg .= ': ' . $xmlErrors[0]->message;
error_log("UrlNav: XML错误: " . $xmlErrors[0]->message);
}
libxml_clear_errors();
if (strpos($content, '<html') !== false) {
$errorMsg .= ' (可能是HTML页面而非RSS)';
}
throw new Exception($errorMsg);
}
libxml_clear_errors();
$feeds = array();
// 获取配置(保持原样)
$config = self::getConfig();
$fullTextPerSite = intval($config->fullTextPerSite ?? 3);
$pageFetchTimeout = intval($config->pageFetchTimeout ?? 10);
$fullTextCount = 0; // 计数器
// 检查是否在白名单中(保持原样)
$selector = self::isInFullTextWhitelist($rssUrl);
$isInWhitelist = ($selector !== false);
error_log("UrlNav: 白名单检查 - 是否在白名单: " . ($isInWhitelist ? '是' : '否') .
($isInWhitelist ? ",选择器: {$selector}" : ""));
// ========== RSS格式解析 ==========
if (isset($xml->channel) && isset($xml->channel->item)) {
error_log("UrlNav: 检测到RSS格式 (channel->item)");
foreach ($xml->channel->item as $itemIndex => $item) {
// 🆕 增强安全处理每个item防止一个item失败影响全部
try {
// 基础内容获取(保持原样)
$fullContent = '';
$description = isset($item->description) ? (string)$item->description : '';
$articleTitle = isset($item->title) ? (string)$item->title : '无标题文章';
$articleLink = isset($item->link) ? (string)$item->link : '';
// 确保标题不为空(保持原样)
if (empty($articleTitle)) {
$articleTitle = '未命名文章 ' . date('Y-m-d H:i:s');
}
// 如果链接为空尝试使用guid保持原样
if (empty($articleLink) && isset($item->guid)) {
$articleLink = (string)$item->guid;
}
error_log("UrlNav: 处理文章: {$articleTitle}");
// 🆕 增强更好的content:encoded提取处理WordPress/Typecho
$namespaces = $item->getNamespaces(true);
// 1. 优先获取content:encodedWordPress完整内容
$encodedContent = '';
if (isset($namespaces['content'])) {
$contentNs = $item->children($namespaces['content']);
if (isset($contentNs->encoded)) {
$encodedContent = (string)$contentNs->encoded;
if (!empty($encodedContent) && trim($encodedContent) !== '') {
$fullContent = $encodedContent;
error_log("UrlNav: ✓ 找到content:encoded完整内容长度: " . strlen($fullContent));
}
}
}
// 2. 如果没有content:encoded使用description
if (empty($fullContent) && !empty($description)) {
$fullContent = $description;
error_log("UrlNav: 使用description作为内容长度: " . strlen($description));
}
// 3. 尝试dc:description命名空间
if (empty($fullContent) && isset($namespaces['dc'])) {
$dcNs = $item->children($namespaces['dc']);
if (isset($dcNs->description) && !empty((string)$dcNs->description)) {
$fullContent = (string)$dcNs->description;
error_log("UrlNav: 找到dc:description内容");
}
}
// 4. 尝试item的直接子元素保持原样
if (empty($fullContent)) {
foreach ($item->children() as $child) {
$childName = $child->getName();
$childContent = (string)$child;
// 跳过已知的短字段
if (in_array($childName, ['title', 'link', 'guid', 'pubDate', 'author', 'category'])) {
continue;
}
// 如果子元素内容较长,可能是文章内容
if (strlen($childContent) > 100) {
$fullContent = $childContent;
error_log("UrlNav: 从子元素 {$childName} 提取内容");
break;
}
}
}
// ===== 页面抓取判断逻辑(完全保持不变) =====
$pageContent = null;
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$articleTitle}");
} else {
$needPageFetch = false;
if ($isInWhitelist && $fullTextCount >= $fullTextPerSite) {
error_log("UrlNav: 已达白名单抓取限制({$fullTextCount}/{$fullTextPerSite}),跳过");
} elseif (!$isInWhitelist) {
error_log("UrlNav: 非白名单网站使用RSS摘要{$rssContentLength}字符),不抓取全文");
}
}
// 执行页面抓取(仅白名单)
if ($needPageFetch && !empty($articleLink)) {
// 短暂延迟,避免对服务器压力过大
if ($itemIndex > 0) {
usleep(rand(300000, 800000)); // 300-800ms延迟
}
// 使用选择器抓取
$pageContent = self::fetchFullContentWithSelector($articleLink, $selector, $pageFetchTimeout);
if (!empty($pageContent)) {
$pageLength = strlen($pageContent);
if ($pageLength > $rssContentLength + 300) {
$fullContent = $pageContent;
error_log("UrlNav: ✓ 页面抓取成功,获得 {$pageLength} 字符内容");
} elseif ($pageLength > 0) {
// 合并内容
$fullContent = $fullContent . "\n\n[页面补充内容]\n" . $pageContent;
error_log("UrlNav: ✓ 合并页面内容,总长度: " . strlen($fullContent));
} else {
error_log("UrlNav: ✗ 页面抓取未获得内容");
}
} else {
error_log("UrlNav: ✗ 页面抓取失败");
}
}
// ===== 页面抓取逻辑结束 =====
// 🔴 修改:非白名单网站全文字段处理逻辑
if (!$isInWhitelist) {
// 非白名单网站,判断 description 或 content:encoded 是否大于500字
$descriptionLength = strlen($description);
$encodedContentLength = strlen($encodedContent);
// 只要 description 或 content:encoded 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $encodedContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容清理和截断
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: 内容过长已截断至10000字符");
}
error_log("UrlNav: 非白名单网站description({$descriptionLength})或content:encoded({$encodedContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav: 非白名单网站description({$descriptionLength})和content:encoded({$encodedContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
if (!empty($fullContent)) {
// 移除过多的空白字符
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
// 截断到合理长度
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: 内容过长已截断至10000字符");
}
} else {
error_log("UrlNav: 警告:未找到任何内容");
$fullContent = $description;
}
}
// 获取发布时间(保持原样)
$pubDate = date('Y-m-d H:i:s', strtotime((string)$item->pubDate));
// 获取GUID保持原样
$guid = (string)$item->guid;
$feeds[] = array(
'title' => $articleTitle,
'link' => $articleLink,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => $pubDate,
'guid' => $guid
);
error_log("UrlNav: ✓ 文章解析完成: {$articleTitle}");
} catch (Exception $e) {
// 🆕 增强单个item失败不影响其他item
error_log("UrlNav: 文章处理失败,跳过: " . $e->getMessage());
continue;
}
}
}
// ========== Atom格式解析保持原样但应用相同逻辑修改 ==========
elseif (isset($xml->entry) || ($xml->getName() == 'feed' && isset($xml->children('http://www.w3.org/2005/Atom')->entry))) {
error_log("UrlNav: 检测到Atom格式");
// 获取所有entry元素保持原样
$entries = isset($xml->entry) ? $xml->entry : $xml->children('http://www.w3.org/2005/Atom')->entry;
foreach ($entries as $entryIndex => $entry) {
$link = '';
$title = '';
$description = '';
$fullContent = '';
$pubDate = '';
$guid = '';
// 获取链接(完全保持不变)
if (isset($entry->link)) {
foreach ($entry->link as $linkElem) {
$attributes = $linkElem->attributes();
if ((string)$attributes['rel'] == 'alternate' || empty((string)$attributes['rel'])) {
$link = (string)$attributes['href'];
break;
}
}
}
// 如果没有找到链接使用id作为链接完全保持不变
if (empty($link) && isset($entry->id)) {
$link = (string)$entry->id;
}
// 获取标题(完全保持不变)
if (isset($entry->title)) {
$title = (string)$entry->title;
}
// 获取描述summary完全保持不变
if (isset($entry->summary)) {
$description = (string)$entry->summary;
}
// ===== Atom全文抓取 =====
// 1. 优先获取content元素完全保持不变
$atomContent = '';
if (isset($entry->content)) {
$contentElem = $entry->content;
$attributes = $contentElem->attributes();
// 检查type属性
$type = (string)($attributes['type'] ?? '');
if ($type === 'html' || $type === 'xhtml' || empty($type)) {
$atomContent = (string)$contentElem;
$fullContent = $atomContent;
error_log("UrlNav: 找到Atom content完整内容类型: {$type},长度: " . strlen($fullContent));
} elseif ($type === 'text') {
$atomContent = htmlspecialchars((string)$contentElem);
$fullContent = $atomContent;
error_log("UrlNav: 找到Atom text内容长度: " . strlen($fullContent));
}
}
// 2. 如果没有content尝试summary完全保持不变
if (empty($fullContent) && isset($entry->summary)) {
$fullContent = $description;
error_log("UrlNav: 使用Atom summary作为内容长度: " . strlen($fullContent));
}
// 3. 检查是否有CDATA包裹完全保持不变
if (!empty($fullContent) && strpos($fullContent, '<![CDATA[') !== false) {
if (preg_match('/<!\[CDATA\[(.*?)\]\]>/s', $fullContent, $matches)) {
$fullContent = $matches[1];
error_log("UrlNav: 从CDATA提取Atom内容");
}
}
// ===== Atom格式的页面抓取判断完全保持不变 =====
$pageContent = null;
$atomContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav Atom: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$title}");
} else {
$needPageFetch = false;
error_log("UrlNav Atom: " . ($isInWhitelist ? "已达限制" : "非白名单") . "使用Atom内容{$atomContentLength}字符)");
}
// 执行Atom页面抓取仅白名单
if ($needPageFetch && !empty($link)) {
if ($entryIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($link, $selector, $pageFetchTimeout);
if (!empty($pageContent) && strlen($pageContent) > $atomContentLength + 300) {
$fullContent = $pageContent;
error_log("UrlNav: ✓ Atom页面抓取成功");
}
}
// ===== Atom页面抓取结束 =====
// 🔴 修改Atom格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 summary 或 content 是否大于500字
$descriptionLength = strlen($description);
$atomContentLength = strlen($atomContent);
// 只要 summary 或 content 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $atomContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: Atom内容过长已截断");
}
error_log("UrlNav Atom: 非白名单网站summary({$descriptionLength})或content({$atomContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Atom: 非白名单网站summary({$descriptionLength})和content({$atomContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 4. 内容截断(完全保持不变)
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: Atom内容过长已截断");
}
}
// ===== Atom全文抓取结束 =====
// 获取发布时间updated或published完全保持不变
if (isset($entry->updated)) {
$pubDate = date('Y-m-d H:i:s', strtotime((string)$entry->updated));
} elseif (isset($entry->published)) {
$pubDate = date('Y-m-d H:i:s', strtotime((string)$entry->published));
} else {
$pubDate = date('Y-m-d H:i:s');
}
// 获取guidid完全保持不变
if (isset($entry->id)) {
$guid = (string)$entry->id;
} else {
$guid = md5($link . $pubDate);
}
$feeds[] = array(
'title' => $title,
'link' => $link,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => $pubDate,
'guid' => $guid
);
}
}
// ========== 其他RSS格式解析保持原样但应用相同逻辑修改 ==========
elseif (isset($xml->item)) {
error_log("UrlNav: 检测到RSS格式 (直接item)");
foreach ($xml->item as $itemIndex => $item) {
// 优先获取完整内容
$fullContent = '';
$description = isset($item->description) ? (string)$item->description : '';
$articleTitle = (string)$item->title;
$articleLink = (string)$item->link;
// 尝试获取content:encoded完整内容
$encodedContent = '';
$namespaces = $item->getNamespaces(true);
if (isset($namespaces['content'])) {
$contentNs = $item->children($namespaces['content']);
if (isset($contentNs->encoded)) {
$encodedContent = (string)$contentNs->encoded;
$fullContent = $encodedContent;
error_log("UrlNav: 找到content:encoded完整内容");
}
}
// 如果没找到content:encoded使用description
if (empty($fullContent) && !empty($description)) {
$fullContent = $description;
error_log("UrlNav: 使用description作为内容");
}
// ===== 其他格式的页面抓取判断 =====
$pageContent = null;
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav Other: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$articleTitle}");
} else {
$needPageFetch = false;
}
// 页面抓取(仅白名单)
if ($needPageFetch && !empty($articleLink)) {
if ($itemIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($articleLink, $selector, $pageFetchTimeout);
if (!empty($pageContent) && strlen($pageContent) > strlen($fullContent) + 300) {
$fullContent = $pageContent;
}
}
// ===== 其他格式页面抓取结束 =====
// 🔴 修改:其他格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 description 或 content:encoded 是否大于500字
$descriptionLength = strlen($description);
$encodedContentLength = strlen($encodedContent);
// 只要 description 或 content:encoded 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $encodedContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
error_log("UrlNav Other: 非白名单网站description({$descriptionLength})或content:encoded({$encodedContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Other: 非白名单网站description({$descriptionLength})和content:encoded({$encodedContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
}
$feeds[] = array(
'title' => $articleTitle,
'link' => $articleLink,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => date('Y-m-d H:i:s', strtotime((string)$item->pubDate)),
'guid' => (string)$item->guid
);
}
}
// ========== 尝试检测命名空间(保持原样但应用相同逻辑修改) ==========
else {
// 检查是否有Atom命名空间
$namespaces = $xml->getNamespaces(true);
foreach ($namespaces as $ns) {
if (strpos($ns, 'www.w3.org/2005/Atom') !== false) {
$atom = $xml->children($ns);
if (isset($atom->entry)) {
error_log("UrlNav: 检测到Atom命名空间格式");
foreach ($atom->entry as $entryIndex => $entry) {
$entry = $entry->children($ns);
// 获取完整内容
$fullContent = '';
$atomContent = '';
$entryDescription = '';
if (isset($entry->content)) {
$atomContent = (string)$entry->content;
$fullContent = $atomContent;
}
if (isset($entry->summary)) {
$entryDescription = (string)$entry->summary;
if (empty($fullContent)) {
$fullContent = $entryDescription;
}
}
// ===== 命名空间格式的页面抓取判断 =====
$needPageFetch = false;
$entryLink = isset($entry->link) ? (string)$entry->link : '';
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
}
// 页面抓取(仅白名单)
if ($needPageFetch && !empty($entryLink)) {
if ($entryIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($entryLink, $selector, $pageFetchTimeout);
if (!empty($pageContent)) {
$fullContent = $pageContent;
}
}
// ===== 命名空间格式页面抓取结束 =====
// 🔴 修改:命名空间格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 summary 或 content 是否大于500字
$descriptionLength = strlen($entryDescription);
$contentLength = strlen($atomContent);
// 只要 summary 或 content 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $contentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
error_log("UrlNav Namespace: 非白名单网站summary({$descriptionLength})或content({$contentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Namespace: 非白名单网站summary({$descriptionLength})和content({$contentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
}
$feeds[] = array(
'title' => isset($entry->title) ? (string)$entry->title : '',
'link' => $entryLink,
'description' => $entryDescription,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => isset($entry->updated) ? date('Y-m-d H:i:s', strtotime((string)$entry->updated)) : date('Y-m-d H:i:s'),
'guid' => isset($entry->id) ? (string)$entry->id : ''
);
}
break;
}
}
}
if (empty($feeds)) {
error_log("UrlNav: 无法识别的RSS格式");
throw new Exception('无法识别的RSS格式');
}
}
if (empty($feeds)) {
error_log("UrlNav: RSS中没有找到文章内容");
throw new Exception('RSS中没有找到文章内容');
}
error_log("UrlNav: 找到 " . count($feeds) . " 篇文章");
error_log("UrlNav: <<< RSS解析成功");
return $feeds;
} catch (Exception $e) {
error_log("UrlNav: <<< RSS解析失败: " . $e->getMessage());
throw new Exception("解析RSS失败 [{$rssUrl}]: " . $e->getMessage());
}
}
/**
* 提取CDATA内容处理多层或不规范CDATA
* @param string $content 原始内容
* @param string $source 来源标识(用于日志)
* @return string 处理后的内容
*/
private static function extractCdataContent($content, $source = '')
{
if (empty($content)) {
return $content;
}
// 如果内容包含CDATA标记
if (strpos($content, '<![CDATA[') !== false) {
$cdataCount = 0;
$originalLength = strlen($content);
// 尝试最多5次提取CDATA内容
while (strpos($content, '<![CDATA[') !== false && $cdataCount < 5) {
$cdataCount++;
// 方法1正则提取最内层CDATA
if (preg_match('/<!\[CDATA\[(.*?)\]\]>/s', $content, $matches)) {
$extracted = $matches[1];
// 如果提取的内容明显比原来短说明CDATA格式正确
if (strlen($extracted) < strlen($content) * 0.9 && strlen($extracted) > 50) {
$content = $extracted;
error_log("UrlNav: 从CDATA提取 {$source} 内容 (第{$cdataCount}次)");
} else {
// CDATA可能嵌套或不规范尝试移除CDATA标记
$content = str_replace('<![CDATA[', '', $content);
$content = str_replace(']]>', '', $content);
error_log("UrlNav: 清理不规范的CDATA标记");
break;
}
} else {
// CDATA格式不正确直接移除标记
$content = str_replace('<![CDATA[', '', $content);
$content = str_replace(']]>', '', $content);
error_log("UrlNav: 清理不规范的CDATA标记");
break;
}
}
$finalLength = strlen($content);
if ($originalLength != $finalLength) {
error_log("UrlNav: CDATA处理完成 {$source},从 {$originalLength}{$finalLength} 字符");
}
}
return $content;
}
/**
* 从文章页面抓取完整内容
* @param string $articleUrl 文章链接
* @param string $title 文章标题(用于日志)
* @param int $timeout 超时时间(秒)
* @return string|null 抓取到的内容失败返回null
*/
private static function fetchFullContentFromPage($articleUrl, $title = '', $timeout = 10)
{
error_log("UrlNav: 尝试从页面抓取完整内容: {$articleUrl}");
try {
// 设置请求头,模拟浏览器
$context = stream_context_create([
'http' => [
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n" .
"Accept-Encoding: gzip\r\n" .
"Connection: close\r\n" .
"Upgrade-Insecure-Requests: 1",
'method' => 'GET'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
]
]);
$html = @file_get_contents($articleUrl, false, $context);
if ($html === false) {
$error = error_get_last();
error_log("UrlNav: 无法访问文章页面: " . ($error['message'] ?? '未知错误'));
return null;
}
if (empty($html)) {
error_log("UrlNav: 文章页面内容为空");
return null;
}
$htmlLength = strlen($html);
error_log("UrlNav: 获取页面成功,长度: {$htmlLength} 字节");
// 转换编码为UTF-8如果检测到其他编码
$encoding = 'UTF-8';
if (preg_match('/<meta[^>]*charset=["\']?([a-zA-Z0-9\-_]+)["\']?/i', $html, $matches)) {
$encoding = strtoupper($matches[1]);
if ($encoding !== 'UTF-8') {
$html = mb_convert_encoding($html, 'UTF-8', $encoding);
error_log("UrlNav: 检测到编码 {$encoding}已转换为UTF-8");
}
}
// 提取内容
$fullContent = '';
// 方法1尝试提取Open Graph描述
if (preg_match('/<meta\s+property="og:description"\s+content="([^"]+)"/i', $html, $matches)) {
$fullContent = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav: 从og:description提取内容长度: " . strlen($fullContent));
}
// 方法2尝试提取meta description
if (empty($fullContent) && preg_match('/<meta\s+name="description"\s+content="([^"]+)"/i', $html, $matches)) {
$fullContent = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav: 从meta description提取内容长度: " . strlen($fullContent));
}
// 方法3尝试提取文章主要内容区域
if (empty($fullContent) || strlen($fullContent) < 500) {
// 常见的文章内容CSS类/ID模式
$contentPatterns = [
// WordPress主题
'/<article[^>]*>(.*?)<\/article>/is',
'/<div\s+class="[^"]*post-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*entry-content[^"]*"[^>]*>(.*?)<\/div>/is',
// 通用内容区域
'/<div\s+class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+id="content"[^>]*>(.*?)<\/div>/is',
// Typecho主题
'/<div\s+class="[^"]*post[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-body[^"]*"[^>]*>(.*?)<\/div>/is',
// 其他常见模式
'/<div\s+class="[^"]*article-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*blog-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-entry[^"]*"[^>]*>(.*?)<\/div>/is'
];
foreach ($contentPatterns as $pattern) {
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
$extracted = $matches[1];
// 移除脚本和样式
$extracted = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $extracted);
$extracted = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $extracted);
$extracted = preg_replace('/<!--.*?-->/s', '', $extracted);
// 提取纯文本,但保留段落结构
$extracted = strip_tags($extracted, '<p><br><div><h1><h2><h3><h4><h5><h6><ul><ol><li>');
$extracted = preg_replace('/\s+/', ' ', $extracted);
$extracted = trim($extracted);
if (strlen($extracted) > 300) {
$fullContent = $extracted;
error_log("UrlNav: 从页面提取主要内容,长度: " . strlen($fullContent));
break;
}
}
}
}
// 方法4提取所有段落文本
if (empty($fullContent) || strlen($fullContent) < 500) {
if (preg_match_all('/<p[^>]*>(.*?)<\/p>/is', $html, $paragraphs)) {
$allText = '';
foreach ($paragraphs[1] as $para) {
$paraText = strip_tags($para);
$paraText = preg_replace('/\s+/', ' ', $paraText);
$paraText = trim($paraText);
if (strlen($paraText) > 50) { // 只保留有意义的段落
$allText .= $paraText . "\n\n";
}
}
if (strlen($allText) > 500) {
$fullContent = $allText;
error_log("UrlNav: 从段落提取文本,长度: " . strlen($fullContent));
}
}
}
// 方法5最后尝试提取整个body文本
if (empty($fullContent) || strlen($fullContent) < 500) {
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
$bodyText = $matches[1];
// 清理
$bodyText = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $bodyText);
$bodyText = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $bodyText);
$bodyText = preg_replace('/<!--.*?-->/s', '', $bodyText);
$bodyText = preg_replace('/<header[^>]*>.*?<\/header>/is', '', $bodyText);
$bodyText = preg_replace('/<footer[^>]*>.*?<\/footer>/is', '', $bodyText);
$bodyText = preg_replace('/<nav[^>]*>.*?<\/nav>/is', '', $bodyText);
// 提取文本
$bodyText = strip_tags($bodyText);
$bodyText = preg_replace('/\s+/', ' ', $bodyText);
$bodyText = trim($bodyText);
if (strlen($bodyText) > 500) {
// 尝试提取核心部分(去掉导航、页脚等)
$bodyText = preg_replace('/首页|关于|联系我们|版权|©|Copyright|all rights reserved/i', '', $bodyText);
$fullContent = $bodyText;
error_log("UrlNav: 从body提取文本长度: " . strlen($fullContent));
}
}
}
// 内容清理和截断
if (!empty($fullContent)) {
// 移除过多的空白字符
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
$fullContent = trim($fullContent);
// 移除常见噪音
$noisePatterns = [
'/分享到.*?(?:微信|微博|QQ|Twitter|Facebook)/',
'/阅读\s*\d+\s*次/',
'/点赞\s*\d+\s*次/',
'/本文由.*?创作/',
'/版权声明.*/',
'/转载请注明出处.*/',
'/相关文章推荐.*/',
'/你可能也喜欢.*/'
];
foreach ($noisePatterns as $pattern) {
$fullContent = preg_replace($pattern, '', $fullContent);
}
// 截断到合理长度
if (strlen($fullContent) > 15000) {
$fullContent = substr($fullContent, 0, 15000) . '... [页面内容已截断]';
}
$finalLength = strlen($fullContent);
error_log("UrlNav: 页面抓取成功,获得 {$finalLength} 字符内容");
return $fullContent;
}
error_log("UrlNav: 无法从页面提取有效内容");
return null;
} catch (Exception $e) {
error_log("UrlNav: 页面抓取异常: " . $e->getMessage());
return null;
}
}
/**
* 智能全文抓取(带选择器)
* @param string $articleUrl 文章链接
* @param string $selector CSS选择器
* @param int $timeout 超时时间(秒)
* @return string|null 抓取到的内容失败返回null
*/
public static function fetchFullContentWithSelector($articleUrl, $selector, $timeout = 10)
{
error_log("UrlNav: 使用选择器抓取全文: {$articleUrl} [选择器: {$selector}]");
try {
// 获取页面HTML
$html = self::fetchHtmlContent($articleUrl, $timeout);
if (empty($html)) {
error_log("UrlNav: 无法获取页面HTML");
return null;
}
$htmlLength = strlen($html);
error_log("UrlNav: 获取页面成功,长度: {$htmlLength} 字节");
// 解析选择器并提取内容
$content = self::extractContentBySelector($html, $selector);
if (!empty($content)) {
$contentLength = strlen($content);
error_log("UrlNav: ✓ 使用选择器 {$selector} 抓取成功,长度: {$contentLength}");
return $content;
}
// 选择器失败,尝试备用方法
error_log("UrlNav: 选择器 {$selector} 未找到内容,尝试备用方法");
$fallbackContent = self::extractContentFallback($html);
if (!empty($fallbackContent)) {
$fallbackLength = strlen($fallbackContent);
error_log("UrlNav: ✓ 备用方法抓取成功,长度: {$fallbackLength}");
return $fallbackContent;
}
error_log("UrlNav: ✗ 所有提取方法都失败");
return null;
} catch (Exception $e) {
error_log("UrlNav: 智能抓取失败: " . $e->getMessage());
return null;
}
}
/**
* 获取HTML内容
* @param string $url 页面URL
* @param int $timeout 超时时间
* @return string|null 页面HTML内容
*/
private static function fetchHtmlContent($url, $timeout)
{
$context = stream_context_create([
'http' => [
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n" .
"Accept-Encoding: gzip\r\n" .
"Connection: close\r\n" .
"Upgrade-Insecure-Requests: 1",
'method' => 'GET'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
]
]);
// 只读取前100KB减少内存占用
$content = @file_get_contents($url, false, $context, 0, 102400);
if ($content === false) {
$error = error_get_last();
error_log("UrlNav: file_get_contents失败: " . ($error['message'] ?? '未知错误'));
return null;
}
return $content;
}
/**
* 根据CSS选择器提取内容
* @param string $html HTML内容
* @param string $selector CSS选择器.class, #id, tag
* @return string|null 提取的内容
*/
private static function extractContentBySelector($html, $selector)
{
// 去除首尾空格
$selector = trim($selector);
// 类选择器 .class
if (strpos($selector, '.') === 0) {
$className = substr($selector, 1);
// 模式1: class="... className ..."
$pattern1 = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sclass="[^"]*' . preg_quote($className, '/') . '[^"]*"[^>]*>(.*?)<\/\1>/is';
// 模式2: class="className"
$pattern2 = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sclass="' . preg_quote($className, '/') . '"[^>]*>(.*?)<\/\1>/is';
// 尝试第一个模式
if (preg_match($pattern1, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用类选择器模式1找到内容标签: {$matches[1]}");
}
// 尝试第二个模式
elseif (preg_match($pattern2, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用类选择器模式2找到内容标签: {$matches[1]}");
} else {
error_log("UrlNav: 类选择器 {$selector} 未匹配到内容");
return null;
}
}
// ID选择器 #id
elseif (strpos($selector, '#') === 0) {
$id = substr($selector, 1);
$pattern = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sid="' . preg_quote($id, '/') . '"[^>]*>(.*?)<\/\1>/is';
if (preg_match($pattern, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用ID选择器找到内容标签: {$matches[1]}");
} else {
error_log("UrlNav: ID选择器 {$selector} 未匹配到内容");
return null;
}
}
// 标签选择器
else {
$pattern = '/<' . preg_quote($selector, '/') . '[^>]*>(.*?)<\/' . preg_quote($selector, '/') . '>/is';
if (preg_match($pattern, $html, $matches)) {
$content = $matches[1];
error_log("UrlNav: 使用标签选择器找到内容");
} else {
error_log("UrlNav: 标签选择器 {$selector} 未匹配到内容");
return null;
}
}
// 清理内容
$content = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $content);
$content = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $content);
$content = preg_replace('/<!--.*?-->/s', '', $content);
// 保留基本的HTML标签
$content = strip_tags($content, '<p><br><div><span><h1><h2><h3><h4><h5><h6><ul><ol><li><strong><em><b><i><a><img>');
// 清理空白字符
$content = preg_replace('/\s+/', ' ', $content);
$content = trim($content);
// 如果内容太短,可能不是真正的文章内容
if (strlen($content) < 100) {
error_log("UrlNav: 选择器提取的内容太短(" . strlen($content) . "字符),可能不是文章内容");
return null;
}
return $content;
}
/**
* 备用提取方法(原有的提取逻辑)
* @param string $html HTML内容
* @return string|null 提取的内容
*/
private static function extractContentFallback($html)
{
$content = '';
// 方法1尝试提取Open Graph描述
if (preg_match('/<meta\s+property="og:description"\s+content="([^"]+)"/i', $html, $matches)) {
$content = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav 备用方法: 从og:description提取内容长度: " . strlen($content));
}
// 方法2尝试提取meta description
if (empty($content) && preg_match('/<meta\s+name="description"\s+content="([^"]+)"/i', $html, $matches)) {
$content = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav 备用方法: 从meta description提取内容长度: " . strlen($content));
}
// 方法3尝试提取常见内容区域
if (empty($content) || strlen($content) < 500) {
$contentPatterns = [
'/<article[^>]*>(.*?)<\/article>/is',
'/<div\s+class="[^"]*post-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*entry-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+id="content"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-body[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*article-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*blog-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-entry[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*markdown-body[^"]*"[^>]*>(.*?)<\/div>/is', // 新增GitHub风格
'/<div\s+class="[^"]*article-body[^"]*"[^>]*>(.*?)<\/div>/is', // 新增:通用文章体
'/<main[^>]*>(.*?)<\/main>/is', // 新增main标签
'/<section[^>]*>(.*?)<\/section>/is' // 新增section标签
];
foreach ($contentPatterns as $pattern) {
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
$extracted = $matches[1];
// 清理
$extracted = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $extracted);
$extracted = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $extracted);
$extracted = preg_replace('/<!--.*?-->/s', '', $extracted);
// 提取纯文本,保留段落结构
$extracted = strip_tags($extracted, '<p><br><div><h1><h2><h3><h4><h5><h6><ul><ol><li>');
$extracted = preg_replace('/\s+/', ' ', $extracted);
$extracted = trim($extracted);
if (strlen($extracted) > 300) {
$content = $extracted;
error_log("UrlNav 备用方法: 从页面提取主要内容,模式: " . substr($pattern, 0, 50) . "...,长度: " . strlen($content));
break;
}
}
}
}
// 方法4提取所有段落文本
if (empty($content) || strlen($content) < 500) {
if (preg_match_all('/<p[^>]*>(.*?)<\/p>/is', $html, $paragraphs)) {
$allText = '';
foreach ($paragraphs[1] as $para) {
$paraText = strip_tags($para);
$paraText = preg_replace('/\s+/', ' ', $paraText);
$paraText = trim($paraText);
if (strlen($paraText) > 50) {
$allText .= $paraText . "\n\n";
}
}
if (strlen($allText) > 500) {
$content = $allText;
error_log("UrlNav 备用方法: 从段落提取文本,长度: " . strlen($content));
}
}
}
// 方法5提取整个body文本
if (empty($content) || strlen($content) < 500) {
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
$bodyText = $matches[1];
// 清理
$bodyText = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $bodyText);
$bodyText = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $bodyText);
$bodyText = preg_replace('/<!--.*?-->/s', '', $bodyText);
$bodyText = preg_replace('/<header[^>]*>.*?<\/header>/is', '', $bodyText);
$bodyText = preg_replace('/<footer[^>]*>.*?<\/footer>/is', '', $bodyText);
$bodyText = preg_replace('/<nav[^>]*>.*?<\/nav>/is', '', $bodyText);
// 提取文本
$bodyText = strip_tags($bodyText);
$bodyText = preg_replace('/\s+/', ' ', $bodyText);
$bodyText = trim($bodyText);
if (strlen($bodyText) > 500) {
// 尝试提取核心部分(去掉导航、页脚等)
$bodyText = preg_replace('/首页|关于|联系我们|版权|©|Copyright|all rights reserved/i', '', $bodyText);
$content = $bodyText;
error_log("UrlNav 备用方法: 从body提取文本长度: " . strlen($content));
}
}
}
// 内容清理
if (!empty($content)) {
// 移除过多的空白字符
$content = preg_replace('/\s+/', ' ', $content);
$content = trim($content);
// 移除常见噪音
$noisePatterns = [
'/分享到.*?(?:微信|微博|QQ|Twitter|Facebook)/',
'/阅读\s*\d+\s*次/',
'/点赞\s*\d+\s*次/',
'/本文由.*?创作/',
'/版权声明.*/',
'/转载请注明出处.*/',
'/相关文章推荐.*/',
'/你可能也喜欢.*/'
];
foreach ($noisePatterns as $pattern) {
$content = preg_replace($pattern, '', $content);
}
// 截断到合理长度
if (strlen($content) > 15000) {
$content = substr($content, 0, 15000) . '... [页面内容已截断]';
}
$finalLength = strlen($content);
error_log("UrlNav 备用方法: 提取成功,获得 {$finalLength} 字符内容");
return $content;
}
error_log("UrlNav 备用方法: 无法从页面提取有效内容");
return null;
}
/**
* 解析全文抓取白名单
* @return array 白名单数组 [rss_url => selector]
*/
public static function parseFullTextWhitelist()
{
$config = self::getConfig();
if (empty($config->fullTextWhitelist)) {
return [];
}
$whitelist = [];
$lines = explode("\n", trim($config->fullTextWhitelist));
foreach ($lines as $line) {
$line = trim($line);
if (empty($line) || strpos($line, '|') === false) {
continue;
}
list($rssUrl, $selector) = explode('|', $line, 2);
$rssUrl = trim($rssUrl);
$selector = trim($selector);
if (!empty($rssUrl) && !empty($selector)) {
$whitelist[$rssUrl] = $selector;
}
}
// 记录白名单数量用于调试
error_log("UrlNav: 解析到 " . count($whitelist) . " 个白名单条目");
foreach ($whitelist as $url => $selector) {
error_log("UrlNav: 白名单 - {$url} => {$selector}");
}
return $whitelist;
}
/**
* 检查RSS源是否在全文抓取白名单中
* @param string $rssUrl RSS地址
* @return string|false 返回选择器不在白名单返回false
*/
public static function isInFullTextWhitelist($rssUrl)
{
static $whitelist = null;
if ($whitelist === null) {
$whitelist = self::parseFullTextWhitelist();
}
// 精确匹配
if (isset($whitelist[$rssUrl])) {
error_log("UrlNav: {$rssUrl} 在白名单中,选择器: " . $whitelist[$rssUrl]);
return $whitelist[$rssUrl];
}
// 通配符匹配(支持简单的通配符)
foreach ($whitelist as $pattern => $selector) {
// 简单的通配符匹配:* 匹配任意字符
if (strpos($pattern, '*') !== false) {
$regex = str_replace('\*', '.*', preg_quote($pattern, '/'));
if (preg_match('/^' . $regex . '$/', $rssUrl)) {
error_log("UrlNav: {$rssUrl} 匹配通配符模式 {$pattern},选择器: {$selector}");
return $selector;
}
}
}
return false;
}
/**
* 修复常见的XML问题
*/
private static function fixCommonXmlIssues($content)
{
// 1. 移除非法控制字符
$content = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', '', $content);
// 2. 修复未关闭的标签(简单修复)
$content = str_replace('<br>', '<br/>', $content);
$content = str_replace('<hr>', '<hr/>', $content);
// 3. 修复未转义的&符号
$content = preg_replace('/&(?!(amp|lt|gt|quot|apos|#\d+);)/', '&amp;', $content);
// 4. 移除多余的空白字符
$content = preg_replace('/>\s+</', '><', $content);
return $content;
}
/**
* 手动刷新RSS供管理界面调用
*/
public static function manualRefreshRss()
{
return self::refreshAllRssFeeds(false);
}
/**
* 记录刷新日志 - 增强版,自动处理缺失字段
*/
private static function logRefresh($type, $successCount, $totalFeeds, $urlCount, $newArticles,
$errorMessage = null, $duration = 0, $cronType = 'rss',
$successRssUrls = null, $failedRssUrls = null)
{
try {
$db = self::getDbConnection();
// 🔴 新增:构建详细信息
$details = array();
if ($successRssUrls !== null) {
$details['success_rss_urls'] = $successRssUrls;
}
if ($failedRssUrls !== null) {
$details['failed_rss_urls'] = $failedRssUrls;
}
// 🔴 新增:构建详细消息
$message = "刷新完成:成功 {$successCount} 个,失败 " . ($urlCount - $successCount) . "";
// 如果有成功和失败的RSS地址列表添加到消息中
if ($successRssUrls !== null && !empty($successRssUrls)) {
// 截取部分RSS地址显示避免过长
$shortSuccessUrls = array_map(function($url) {
if (strlen($url) > 50) {
return substr($url, 0, 50) . '...';
}
return $url;
}, array_slice($successRssUrls, 0, 5)); // 最多显示5个
$successList = implode('; ', $shortSuccessUrls);
if (count($successRssUrls) > 5) {
$successList .= '... (共' . count($successRssUrls) . '个)';
}
$message .= "\n成功的RSS: " . $successList;
}
if ($failedRssUrls !== null && !empty($failedRssUrls)) {
// 截取部分RSS地址显示避免过长
$shortFailedUrls = array_map(function($url) {
if (strlen($url) > 50) {
return substr($url, 0, 50) . '...';
}
return $url;
}, array_slice($failedRssUrls, 0, 5)); // 最多显示5个
$failedList = implode('; ', $shortFailedUrls);
if (count($failedRssUrls) > 5) {
$failedList .= '... (共' . count($failedRssUrls) . '个)';
}
$message .= "\n失败的RSS: " . $failedList;
}
$detailsJson = !empty($details) ? json_encode($details, JSON_UNESCAPED_UNICODE) : null;
// 方法1首先尝试使用完整字段插入
try {
// 检查字段是否存在
$tableInfo = $db->query("PRAGMA table_info(urlnav_refresh_log)");
$columns = $tableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasMessage = false;
$hasDetails = false;
foreach ($columns as $column) {
if ($column['name'] === 'message') {
$hasMessage = true;
}
if ($column['name'] === 'details') {
$hasDetails = true;
}
}
if ($hasMessage && $hasDetails) {
// 如果两个字段都存在,使用完整插入
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type, message, details)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType,
$message,
$detailsJson
));
error_log("UrlNav: 刷新日志记录成功(使用完整字段)");
} elseif ($hasMessage) {
// 只有message字段存在
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type, message)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType,
$message
));
error_log("UrlNav: 刷新日志记录成功使用message字段");
} else {
// 两个字段都不存在,使用旧的方式
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType
));
error_log("UrlNav: 刷新日志记录成功(旧方式)");
}
} catch (Exception $e) {
// 如果字段缺失,尝试修复并重试
if (strpos($e->getMessage(), 'has no column named') !== false) {
error_log("UrlNav: 检测到字段缺失,尝试修复: " . $e->getMessage());
// 运行数据库迁移
self::migrateDatabase();
// 重试插入使用更简单的SQL
try {
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, error_message, refresh_time, duration, cron_type)
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$errorMessage,
$duration,
$cronType
));
error_log("UrlNav: 刷新日志记录成功(修复后)");
} catch (Exception $retryError) {
// 如果还不行,尝试更简化的插入
try {
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, refresh_time)
VALUES (?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute(array($type, $successCount));
error_log("UrlNav: 刷新日志记录成功(最小字段)");
} catch (Exception $minimalError) {
error_log("UrlNav: 最小字段插入也失败: " . $minimalError->getMessage());
}
}
} else {
// 其他错误,直接抛出
throw $e;
}
}
// 如果需要记录到error_log中
error_log("UrlNav: 刷新统计 - 类型: {$type}, 成功: {$successCount}, 总文章: {$totalFeeds}, 网址数: {$urlCount}, 新文章: {$newArticles}, 耗时: {$duration}");
if ($successRssUrls !== null && !empty($successRssUrls)) {
error_log("UrlNav: 成功的RSS地址: " . implode(', ', array_slice($successRssUrls, 0, 3)));
}
if ($failedRssUrls !== null && !empty($failedRssUrls)) {
error_log("UrlNav: 失败的RSS地址: " . implode(', ', array_slice($failedRssUrls, 0, 3)));
}
} catch (Exception $e) {
error_log('UrlNav: 记录刷新日志失败: ' . $e->getMessage());
// 终极备用方案:只记录到错误日志
error_log("UrlNav: 刷新统计(无法写入数据库) - 类型: {$type}, 成功: {$successCount}, 总文章: {$totalFeeds}, 网址数: {$urlCount}, 新文章: {$newArticles}, 耗时: {$duration}");
}
}
/**
* 记录定时任务日志
*/
private static function logCron($type, $result)
{
try {
$db = self::getDbConnection();
$errorMessage = null;
if (is_array($result)) {
if (isset($result['error'])) {
$errorMessage = $result['error'];
}
$result = json_encode($result);
}
$stmt = $db->prepare("
INSERT INTO urlnav_cron_log
(cron_type, result, error_message, executed_time)
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute(array(
$type,
$result,
$errorMessage
));
} catch (Exception $e) {
error_log('UrlNav: 记录定时任务日志失败: ' . $e->getMessage());
}
}
/**
* 验证定时任务密钥
*/
public static function validateCronSecret($secret, $type = 'rss')
{
$config = self::getConfig();
if ($type === 'rss') {
return $secret === $config->rssCronSecret;
} elseif ($type === 'status') {
return $secret === $config->statusCronSecret;
}
return false;
}
/**
* 执行公开的RSS定时任务 - 防502版本
*/
public static function executePublicRssCron($secret = null)
{
// 关键:立即设置响应头并输出内容
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出JSON开始标记让Nginx知道脚本在运行
echo '{"status":"processing","message":"任务开始执行...","timestamp":' . time() . '}';
flush();
ob_flush();
}
error_log('UrlNav RSS Cron: 请求开始 ' . date('Y-m-d H:i:s'));
try {
// 验证密钥
if ($secret && !self::validateCronSecret($secret, 'rss')) {
error_log('UrlNav RSS Cron: 无效的Cron密钥');
return array(
'success' => false,
'message' => '无效的访问密钥',
'timestamp' => time()
);
}
// 执行定时任务
$result = self::executeRssCronTask();
// 最终输出完整结果
if (!headers_sent()) {
echo json_encode($result);
}
return $result;
} catch (Exception $e) {
error_log('UrlNav RSS Cron: 异常: ' . $e->getMessage());
$errorResult = array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => 'RSS定时任务执行失败'
);
if (!headers_sent()) {
echo json_encode($errorResult);
}
return $errorResult;
}
}
/**
* 执行公开的状态检查定时任务 - 无锁版
*/
public static function executePublicStatusCron($secret = null)
{
// 立即设置响应头避免502
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
}
error_log('UrlNav Status Cron: 请求 ' . date('Y-m-d H:i:s'));
try {
// 验证密钥
if ($secret && !self::validateCronSecret($secret, 'status')) {
error_log('UrlNav Status Cron: 无效的Cron密钥');
return array(
'success' => false,
'message' => '无效的访问密钥',
'timestamp' => time()
);
}
// 直接执行定时任务,不进行锁检查
$result = self::executeStatusCronTask();
return $result;
} catch (Exception $e) {
error_log('UrlNav Status Cron: 异常: ' . $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => '状态检查定时任务执行失败'
);
}
}
/**
* 获取解锁URL
*/
private static function getUnlockUrl($secret, $type = 'rss')
{
$options = Typecho_Widget::widget('Widget_Options');
$baseUrl = $options->siteUrl;
if (substr($baseUrl, -1) == '/') {
$baseUrl = substr($baseUrl, 0, -1);
}
return $baseUrl . '/action/urlnav?do=unlockCron&type=' . $type . '&secret=' . urlencode($secret);
}
private static function cleanExpiredCache()
{
try {
$config = self::getConfig();
$keepTime = intval($config->rssKeepTime ?? 0); // ← 默认值改为 0
// 如果设置为0则不清理默认行为
if ($keepTime <= 0) {
return 0;
}
$db = self::getDbConnection();
// 计算清理时间点 - 按照文章发布时间
$cutoffTime = date('Y-m-d H:i:s', time() - $keepTime);
// 先获取需要清理的文章数量用于日志
$countStmt = $db->prepare("
SELECT COUNT(*) as count
FROM urlnav_rss_cache
WHERE pub_date < ?
AND is_fresh = 1
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$countStmt->execute([$cutoffTime]);
$totalCount = $countStmt->fetchColumn();
if ($totalCount == 0) {
return 0;
}
// 标记过期的文章为非新鲜状态
$stmt = $db->prepare("
UPDATE urlnav_rss_cache
SET is_fresh = 0
WHERE pub_date < ?
AND is_fresh = 1
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$stmt->execute([$cutoffTime]);
$markedCount = $stmt->rowCount();
if ($markedCount > 0) {
$days = round($keepTime / 86400, 1);
error_log("UrlNav: 自动清理 - 标记了 {$markedCount} 条超过{$days}天的文章为非新鲜状态");
}
return $markedCount;
} catch (Exception $e) {
error_log('UrlNav: 清理过期文章失败: ' . $e->getMessage());
return 0;
}
}
public static function cleanAllRssCache()
{
try {
$db = self::getDbConnection();
// 清理所有非新鲜缓存(排除已收藏的文章)
$stmt = $db->prepare("
DELETE FROM urlnav_rss_cache
WHERE is_fresh = 0
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$stmt->execute();
$deletedCount = $stmt->rowCount();
// 获取当前保留时间配置用于日志
$config = self::getConfig();
$keepTime = intval($config->rssKeepTime ?? 0);
if ($keepTime > 0) {
$days = round($keepTime / 86400, 1);
$logMsg = "清理了 {$deletedCount} 条非新鲜RSS文章当前保留策略{$days}天内的文章保持新鲜)";
} else {
$logMsg = "清理了 {$deletedCount} 条非新鲜RSS文章当前保留策略不自动清理仅清理已标记文章";
}
// 重置所有URL的统计信息
$db->exec("UPDATE urlnav_urls SET
refresh_count = 0,
success_count = 0,
failure_count = 0,
last_error = NULL,
last_refresh = NULL
");
if ($deletedCount > 0) {
error_log("UrlNav: {$logMsg}");
}
return $deletedCount;
} catch (Exception $e) {
error_log('UrlNav: 清理所有缓存失败: ' . $e->getMessage());
throw $e;
}
}
/**
* 获取定时任务日志 - 修复版确保RSS地址信息正确返回
*/
public static function getCronLogs($type = null, $limit = 20)
{
try {
$db = self::getDbConnection();
$sql = "SELECT * FROM urlnav_cron_log";
$params = [];
if ($type !== null) {
$sql .= " WHERE cron_type LIKE ?";
$params[] = $type . '%';
}
$sql .= " ORDER BY executed_time DESC LIMIT ?";
$params[] = $limit;
$stmt = $db->prepare($sql);
$stmt->execute($params);
$logs = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 🆕 关键修复确保每个日志项的result字段包含RSS地址信息
foreach ($logs as &$log) {
if (!empty($log['result'])) {
try {
$result = json_decode($log['result'], true);
if ($result) {
// 🔴 检查是否是RSS相关日志
$isRssLog = $log['cron_type'] && (
strpos($log['cron_type'], 'rss') !== false ||
strpos($log['cron_type'], 'refresh') !== false
);
if ($isRssLog) {
// 确保RSS地址字段存在
if (!isset($result['successRssUrls'])) {
$result['successRssUrls'] = [];
}
if (!isset($result['failedRssUrls'])) {
$result['failedRssUrls'] = [];
}
// 重新编码为JSON
$log['result'] = json_encode($result, JSON_UNESCAPED_UNICODE);
}
}
} catch (Exception $e) {
// JSON解析失败保持原样
error_log('UrlNav: 解析cron_logs的result字段失败: ' . $e->getMessage());
}
}
}
return $logs;
} catch (Exception $e) {
error_log('UrlNav: 获取定时任务日志失败: ' . $e->getMessage());
return array();
}
}
/**
* 获取定时任务统计
*/
public static function getCronStats($type = null)
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "";
$params = [];
if ($type !== null) {
$whereClause = " WHERE cron_type LIKE ?";
$params[] = $type . '%';
}
// 获取总执行次数
$stmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_cron_log $whereClause");
$stmt->execute($params);
$totalResult = $stmt->fetch(PDO::FETCH_ASSOC);
$total = $totalResult['total'] ?? 0;
// 获取成功次数
$stmt = $db->prepare("SELECT COUNT(*) as success FROM urlnav_cron_log $whereClause AND (error_message IS NULL OR error_message = '')");
$stmt->execute($params);
$successResult = $stmt->fetch(PDO::FETCH_ASSOC);
$success = $successResult['success'] ?? 0;
// 获取失败次数
$stmt = $db->prepare("SELECT COUNT(*) as failed FROM urlnav_cron_log $whereClause AND error_message IS NOT NULL AND error_message != ''");
$stmt->execute($params);
$failedResult = $stmt->fetch(PDO::FETCH_ASSOC);
$failed = $failedResult['failed'] ?? 0;
// 获取最后执行时间
$stmt = $db->prepare("SELECT MAX(executed_time) as last_executed FROM urlnav_cron_log $whereClause");
$stmt->execute($params);
$lastResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastExecuted = $lastResult['last_executed'] ?? null;
// 获取各种类型的统计
$stmt = $db->prepare("SELECT cron_type, COUNT(*) as count FROM urlnav_cron_log GROUP BY cron_type ORDER BY cron_type");
$typeResults = $stmt->fetchAll(PDO::FETCH_ASSOC);
$typeStats = array();
foreach ($typeResults as $row) {
$typeStats[$row['cron_type']] = $row['count'];
}
return array(
'total' => $total,
'success' => $success,
'failed' => $failed,
'last_executed' => $lastExecuted,
'type_stats' => $typeStats,
'success_rate' => $total > 0 ? round(($success / $total) * 100, 2) : 0
);
} catch (Exception $e) {
return array(
'total' => 0,
'success' => 0,
'failed' => 0,
'last_executed' => null,
'type_stats' => array(),
'success_rate' => 0
);
}
}
/**
* 获取RSS定时任务统计
*/
public static function getRssCronStats()
{
return self::getCronStats('rss');
}
/**
* 获取状态检查定时任务统计
*/
public static function getStatusCronStats()
{
return self::getCronStats('status');
}
/**
* 获取有RSS地址的网址总数
*/
public static function getTotalUrlsWithRss()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT COUNT(*) as total FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取有RSS地址的网址总数失败: ' . $e->getMessage());
return 0;
}
}
/**
* 获取从未刷新过的网址数量
*/
public static function getUnrefreshedUrlsCount()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT COUNT(*) as total FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND last_refresh IS NULL
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取从未刷新过的网址数量失败: ' . $e->getMessage());
return 0;
}
}
/**
* 获取最久的刷新时间
*/
public static function getOldestRefreshTime()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT MIN(last_refresh) as oldest FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND last_refresh IS NOT NULL
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['oldest'] ?? null;
} catch (Exception $e) {
error_log('UrlNav: 获取最久的刷新时间失败: ' . $e->getMessage());
return null;
}
}
/**
* 获取所有分类
*/
public static function getAllCategories()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("SELECT * FROM urlnav_categories WHERE is_active = 1 ORDER BY sort_order, created_at DESC");
$categories = $stmt->fetchAll(PDO::FETCH_ASSOC);
return $categories;
} catch (Exception $e) {
error_log('UrlNav: 获取分类失败: ' . $e->getMessage());
return array();
}
}
/**
* 获取所有网址(支持分页和分类筛选、状态筛选)
*/
public static function getAllUrls($categoryId = null, $page = 1, $pageSize = 20, $search = '', $status = '', $hasRss = '', $starRating = null)
{
try {
$db = self::getDbConnection();
$whereClause = "WHERE u.is_active = 1";
$params = array();
if ($categoryId !== null && $categoryId !== '') {
$whereClause .= " AND u.category_id = ?";
$params[] = $categoryId;
}
// 添加状态筛选 - 新增
if ($status !== null && $status !== '') {
switch ($status) {
case 'online':
$whereClause .= " AND u.is_online = 1 AND u.last_status_check IS NOT NULL";
break;
case 'offline':
$whereClause .= " AND u.is_online = 0 AND u.last_status_check IS NOT NULL";
break;
case 'unchecked':
$whereClause .= " AND u.last_status_check IS NULL";
break;
}
}
// RSS筛选 - 简化版(与状态筛选保持一致)
if ($hasRss !== null && $hasRss !== '') {
switch ($hasRss) {
case 'yes':
// 有RSS地址
$whereClause .= " AND u.rss_url IS NOT NULL AND u.rss_url != ''";
break;
case 'no':
// 无RSS地址
$whereClause .= " AND (u.rss_url IS NULL OR u.rss_url = '')";
break;
}
}
// 新增:星级筛选
if ($starRating !== null && $starRating !== '') {
switch ($starRating) {
case '1':
$whereClause .= " AND u.star_rating = 1";
break;
case '2':
$whereClause .= " AND u.star_rating = 2";
break;
case '3':
$whereClause .= " AND u.star_rating = 3";
break;
case '0':
$whereClause .= " AND u.star_rating = 0";
break;
case 'starred':
$whereClause .= " AND u.star_rating > 0";
break;
}
}
if ($search) {
$whereClause .= " AND (u.title LIKE ? OR u.url LIKE ? OR u.description LIKE ? OR u.rss_url LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countStmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_urls u $whereClause");
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// 计算分页
$offset = ($page - 1) * $pageSize;
// 获取数据(包含分类名称)
$sql = "SELECT u.*, c.name as category_name
FROM urlnav_urls u
LEFT JOIN urlnav_categories c ON u.category_id = c.id
$whereClause
ORDER BY u.sort_order, u.created_at DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, array($pageSize, $offset));
$stmt->execute($stmtParams);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
return array(
'total' => $total,
'totalPages' => ceil($total / $pageSize),
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $urls
);
} catch (Exception $e) {
error_log('UrlNav: 获取网址失败: ' . $e->getMessage());
return array(
'total' => 0,
'totalPages' => 0,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => array()
);
}
}
/**
* 通过ID获取分类
*/
public static function getCategoryById($id)
{
if (empty($id)) {
return null;
}
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT * FROM urlnav_categories WHERE id = ?");
$stmt->execute(array($id));
$category = $stmt->fetch(PDO::FETCH_ASSOC);
return $category;
} catch (Exception $e) {
error_log('UrlNav: 获取分类信息失败: ' . $e->getMessage());
return null;
}
}
/**
* 通过ID获取网址
*/
public static function getUrlById($id)
{
if (empty($id)) {
return null;
}
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT * FROM urlnav_urls WHERE id = ?");
$stmt->execute(array($id));
$url = $stmt->fetch(PDO::FETCH_ASSOC);
return $url;
} catch (Exception $e) {
error_log('UrlNav: 获取网址信息失败: ' . $e->getMessage());
return null;
}
}
/**
* 验证URL是否有效
*/
public static function validateUrl($url)
{
if (empty($url)) {
return false;
}
// 检查URL格式
if (!filter_var($url, FILTER_VALIDATE_URL)) {
return false;
}
// 检查URL协议
$parsedUrl = parse_url($url);
if (!in_array($parsedUrl['scheme'] ?? '', array('http', 'https'))) {
return false;
}
return true;
}
/**
* 获取所有RSS信息支持分页和分类筛选、搜索、星级筛选
*/
public static function getAllRssFeeds($categoryId = null, $page = 1, $pageSize = 20, $search = '', $starRating = '')
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "WHERE c.is_fresh = 1";
$params = array();
if ($categoryId !== null && $categoryId !== '' && $categoryId !== 'all') {
$whereClause .= " AND u.category_id = ?";
$params[] = $categoryId;
}
// 星级筛选
if ($starRating !== null && $starRating !== '') {
if ($starRating === '1') {
$whereClause .= " AND u.star_rating = 1";
} elseif ($starRating === '2') {
$whereClause .= " AND u.star_rating = 2";
} elseif ($starRating === '3') {
$whereClause .= " AND u.star_rating = 3";
} elseif ($starRating === '0') {
$whereClause .= " AND u.star_rating = 0";
} elseif ($starRating === 'starred') {
$whereClause .= " AND u.star_rating > 0";
}
}
if ($search) {
$whereClause .= " AND (c.feed_title LIKE ? OR c.feed_description LIKE ? OR c.full_content LIKE ? OR u.title LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countSql = "SELECT COUNT(*) as total
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
$whereClause";
$countStmt = $db->prepare($countSql);
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// ==== 修复3385行的错误 ====
// 确保page和pageSize是数字类型
$page = is_numeric($page) ? (int)$page : 1;
$pageSize = is_numeric($pageSize) ? (int)$pageSize : 20;
// 现在计算偏移量 - 这应该是3385行
$offset = ($page - 1) * $pageSize;
// ==== 修复结束 ====
// 获取数据
$sql = "SELECT c.*, u.title as site_title, u.url as site_url, u.category_id, u.star_rating,
cat.name as category_name
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
LEFT JOIN urlnav_categories cat ON u.category_id = cat.id
$whereClause
ORDER BY c.pub_date DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, array($pageSize, $offset));
$stmt->execute($stmtParams);
$feeds = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 计算总页数
$totalPages = $pageSize > 0 ? ceil($total / $pageSize) : 0;
return array(
'total' => $total,
'totalPages' => $totalPages,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $feeds
);
} catch (Exception $e) {
error_log('UrlNav: 获取RSS信息失败: ' . $e->getMessage());
return array(
'total' => 0,
'totalPages' => 0,
'currentPage' => is_numeric($page) ? (int)$page : 1,
'pageSize' => is_numeric($pageSize) ? (int)$pageSize : 20,
'data' => array()
);
}
}
/**
* 获取星级选项
*/
public static function getStarRatingOptions()
{
return array(
'0' => '无星级',
'1' => '★',
'2' => '★★',
'3' => '★★★'
);
}
/**
* 获取星级显示文本
*/
public static function getStarRatingText($rating)
{
$options = self::getStarRatingOptions();
return isset($options[$rating]) ? $options[$rating] : '无星级';
}
/**
* 获取缓存统计信息
*/
public static function getCacheStats()
{
try {
$db = self::getDbConnection();
// 获取总缓存数
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_rss_cache");
$totalResult = $stmt->fetch(PDO::FETCH_ASSOC);
$total = $totalResult['total'] ?? 0;
// 获取新鲜缓存数
$stmt = $db->query("SELECT COUNT(*) as fresh FROM urlnav_rss_cache WHERE is_fresh = 1");
$freshResult = $stmt->fetch(PDO::FETCH_ASSOC);
$fresh = $freshResult['fresh'] ?? 0;
// 获取陈旧缓存数
$stmt = $db->query("SELECT COUNT(*) as stale FROM urlnav_rss_cache WHERE is_fresh = 0");
$staleResult = $stmt->fetch(PDO::FETCH_ASSOC);
$stale = $staleResult['stale'] ?? 0;
// 获取最新缓存时间
$stmt = $db->query("SELECT MAX(cached_at) as last_cached FROM urlnav_rss_cache");
$lastResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastCached = $lastResult['last_cached'] ?? null;
// 获取最早缓存时间
$stmt = $db->query("SELECT MIN(cached_at) as first_cached FROM urlnav_rss_cache");
$firstResult = $stmt->fetch(PDO::FETCH_ASSOC);
$firstCached = $firstResult['first_cached'] ?? null;
// 获取最后一次自动刷新时间
$stmt = $db->query("SELECT MAX(refresh_time) as last_auto_refresh FROM urlnav_refresh_log WHERE cron_type = 'rss'");
$refreshResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastAutoRefresh = $refreshResult['last_auto_refresh'] ?? null;
// 获取URL统计
$stmt = $db->query("
SELECT
COUNT(DISTINCT url_id) as url_count,
AVG((SELECT COUNT(*) FROM urlnav_rss_cache c2 WHERE c2.url_id = c.url_id)) as avg_feeds_per_url,
MAX((SELECT COUNT(*) FROM urlnav_rss_cache c2 WHERE c2.url_id = c.url_id)) as max_feeds_per_url
FROM urlnav_rss_cache c
");
$urlStats = $stmt->fetch(PDO::FETCH_ASSOC);
return array(
'total' => $total,
'fresh' => $fresh,
'stale' => $stale,
'last_cached' => $lastCached,
'first_cached' => $firstCached,
'last_auto_refresh' => $lastAutoRefresh,
'url_stats' => $urlStats
);
} catch (Exception $e) {
return array(
'total' => 0,
'fresh' => 0,
'stale' => 0,
'last_cached' => null,
'first_cached' => null,
'last_auto_refresh' => null,
'url_stats' => array('url_count' => 0, 'avg_feeds_per_url' => 0, 'max_feeds_per_url' => 0)
);
}
}
/**
* 获取刷新统计信息
*/
public static function getRefreshStats($cronType = 'rss', $limit = 10)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("
SELECT * FROM urlnav_refresh_log
WHERE cron_type = ?
ORDER BY refresh_time DESC
LIMIT ?
");
$stmt->execute(array($cronType, $limit));
$logs = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 计算总体统计
$totalStmt = $db->prepare("
SELECT
COUNT(*) as total_refreshes,
SUM(success_count) as total_success,
SUM(new_articles) as total_new_articles,
AVG(duration) as avg_duration
FROM urlnav_refresh_log
WHERE cron_type = ?
");
$totalStmt->execute(array($cronType));
$totalStats = $totalStmt->fetch(PDO::FETCH_ASSOC);
return array(
'logs' => $logs,
'total_stats' => $totalStats
);
} catch (Exception $e) {
return array(
'logs' => array(),
'total_stats' => array(
'total_refreshes' => 0,
'total_success' => 0,
'total_new_articles' => 0,
'avg_duration' => 0
)
);
}
}
public static function manualCheckStatus($urlIds = null, $isBatchCheck = false, $batchInfo = null)
{
// 增加执行时间限制
@set_time_limit(300); // 5分钟
try {
$db = self::getDbConnection();
// 解析批次信息
$batchNumber = 1;
$totalBatches = 1;
$batchSize = 10;
$selectedIds = []; // 存储所有选中的ID
if ($batchInfo) {
$batchData = json_decode($batchInfo, true);
if ($batchData) {
$batchNumber = intval($batchData['batch'] ?? 1);
$totalBatches = intval($batchData['total'] ?? 1);
$batchSize = intval($batchData['size'] ?? 10);
// 如果有存储的选中ID优先使用
if (!empty($batchData['selected_ids'])) {
if (is_string($batchData['selected_ids'])) {
$selectedIds = array_filter(explode(',', $batchData['selected_ids']), 'is_numeric');
} else if (is_array($batchData['selected_ids'])) {
$selectedIds = array_filter($batchData['selected_ids'], 'is_numeric');
}
}
}
}
// ==== 修改开始:自动检查逻辑 ====
// 如果是自动检查($urlIds为null且不是批量检查
if ($urlIds === null && !$isBatchCheck && $batchInfo === null) {
// 获取配置中的每次检查数量
$config = self::getConfig();
$maxCheck = intval($config->statusCheckMax ?? 80);
// 安全限制
if ($maxCheck > 200) $maxCheck = 200;
if ($maxCheck < 1) $maxCheck = 20;
// 自动检查逻辑:优先检查未检查过的,然后按检查时间排序
$sql = "SELECT * FROM urlnav_urls WHERE is_active = 1
ORDER BY
CASE WHEN last_status_check IS NULL THEN 0 ELSE 1 END,
last_status_check ASC NULLS FIRST
LIMIT ?";
$params = [$maxCheck];
$stmt = $db->prepare($sql);
$stmt->execute($params);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
if (empty($urls)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0
];
}
// 执行检查
$successCount = 0;
$failedCount = 0;
$results = [];
$totalResponseTime = 0;
// 🔴 新增:收集成功和失败的网址
$successUrls = [];
$failedUrls = [];
$logStmt = $db->prepare("
INSERT INTO urlnav_status_log
(url_id, is_online, status_code, response_time, check_time, error_message)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, ?)
");
$updateStmt = $db->prepare("
UPDATE urlnav_urls SET
is_online = ?,
last_status_check = CURRENT_TIMESTAMP,
status_check_count = status_check_count + 1,
last_status_code = ?
WHERE id = ?
");
foreach ($urls as $url) {
$status = self::checkSingleWebsite($url['url']);
$urlId = $url['id'];
$results[$urlId] = $status;
if ($status['success']) {
$totalResponseTime += $status['response_time'];
$successCount++;
// 🔴 新增:收集成功网址
$successUrls[] = [
'id' => $urlId,
'url' => $url['url'],
'title' => $url['title'] ?? $url['url'],
'response_time' => $status['response_time']
];
} else {
$failedCount++;
// 🔴 新增:收集失败网址
$failedUrls[] = [
'id' => $urlId,
'url' => $url['url'],
'title' => $url['title'] ?? $url['url'],
'error' => $status['message'],
'status_code' => $status['status_code']
];
}
// 记录日志
$logStmt->execute([
$urlId,
$status['success'] ? 1 : 0,
$status['status_code'],
$status['response_time'],
$status['message']
]);
// 更新状态
$updateStmt->execute([
$status['success'] ? 1 : 0,
$status['status_code'],
$urlId
]);
// 短暂休息,避免服务器压力过大
if (count($results) % 5 == 0) {
usleep(50000); // 50ms
}
}
$avgResponseTime = $successCount > 0 ? round($totalResponseTime / $successCount, 2) : 0;
return [
'success' => true,
'message' => sprintf("自动检查完成: %d成功, %d失败", $successCount, $failedCount),
'total' => count($urls),
'success_count' => $successCount,
'failed_count' => $failedCount,
'avg_response_time' => $avgResponseTime,
'results' => $results,
// 🔴 新增:返回成功和失败的网址列表
'successUrls' => $successUrls,
'failedUrls' => $failedUrls
];
}
// ==== 修改结束:自动检查逻辑 ====
// 以下保持原样,手动检查和批量检查逻辑不变
// 构建查询 - 修复关键逻辑
$sql = "SELECT * FROM urlnav_urls WHERE is_active = 1";
$params = [];
$totalUrls = 0;
// 确定要检查的网址ID
$idsToCheck = [];
// 情况1直接传入ID数组或字符串
if ($urlIds !== null) {
if (is_array($urlIds)) {
if (empty($urlIds)) {
return [
'success' => true,
'message' => '没有选择要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
$idArray = array_filter($urlIds, 'is_numeric');
} else if (is_string($urlIds) && $urlIds !== '') {
if (strpos($urlIds, ',') !== false) {
$idArray = array_filter(explode(',', $urlIds), 'is_numeric');
} else if (is_numeric($urlIds)) {
$idArray = [$urlIds];
}
}
}
// 情况2从批次信息中获取选中的ID
else if (!empty($selectedIds)) {
$idsToCheck = $selectedIds;
}
// 如果有要检查的ID构建IN查询
if (!empty($idsToCheck)) {
$totalUrls = count($idsToCheck);
// 重新计算批次信息
$totalBatches = ceil($totalUrls / $batchSize);
// 如果批次号超过总批次,返回完成
if ($batchNumber > $totalBatches) {
return [
'success' => true,
'message' => '所有选中的网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'completed' => true
];
}
// 计算当前批次的ID范围
$offset = ($batchNumber - 1) * $batchSize;
$currentBatchIds = array_slice($idsToCheck, $offset, $batchSize);
if (empty($currentBatchIds)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
// 构建IN查询
$placeholders = implode(',', array_fill(0, count($currentBatchIds), '?'));
$sql .= " AND id IN ($placeholders) ORDER BY id";
$params = $currentBatchIds;
}
// 情况3没有指定ID检查全部网址
else {
$sql .= " ORDER BY last_status_check ASC NULLS FIRST, id ASC LIMIT ? OFFSET ?";
// 获取总网址数用于计算批次
$totalStmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$totalResult = $totalStmt->fetch(PDO::FETCH_ASSOC);
$totalUrls = $totalResult['total'] ?? 0;
$totalBatches = ceil($totalUrls / $batchSize);
// 如果批次号超过总批次,返回完成
if ($batchNumber > $totalBatches) {
return [
'success' => true,
'message' => '所有网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'completed' => true
];
}
$offset = ($batchNumber - 1) * $batchSize;
$params = [$batchSize, $offset];
}
$stmt = $db->prepare($sql);
$stmt->execute($params);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
if (empty($urls)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
$successCount = 0;
$failedCount = 0;
$results = [];
$totalResponseTime = 0;
$checkedIds = [];
// 准备更新语句
$logStmt = $db->prepare("
INSERT INTO urlnav_status_log
(url_id, is_online, status_code, response_time, check_time, error_message)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, ?)
");
$updateStmt = $db->prepare("
UPDATE urlnav_urls SET
is_online = ?,
last_status_check = CURRENT_TIMESTAMP,
status_check_count = status_check_count + 1,
last_status_code = ?
WHERE id = ?
");
foreach ($urls as $url) {
$status = self::checkSingleWebsite($url['url']);
$urlId = $url['id'];
$results[$urlId] = $status;
$checkedIds[] = $urlId;
if ($status['success']) {
$totalResponseTime += $status['response_time'];
$successCount++;
} else {
$failedCount++;
}
// 记录日志
$logStmt->execute([
$urlId,
$status['success'] ? 1 : 0,
$status['status_code'],
$status['response_time'],
$status['message']
]);
// 更新状态
$updateStmt->execute([
$status['success'] ? 1 : 0,
$status['status_code'],
$urlId
]);
// 短暂休息,避免服务器压力过大
if (count($checkedIds) % 5 == 0) {
usleep(100000); // 100ms
}
}
// 判断是否还有更多批次
$hasMore = false;
$processedCount = ($batchNumber - 1) * $batchSize + count($checkedIds);
// 如果有选中的ID
if (!empty($idsToCheck)) {
$hasMore = ($processedCount < count($idsToCheck));
}
// 如果是检查全部
else {
$hasMore = ($processedCount < $totalUrls);
}
$avgResponseTime = $successCount > 0 ? round($totalResponseTime / $successCount, 2) : 0;
// 准备批次信息用于下一批
$nextBatchInfo = [
'batch' => $batchNumber + 1,
'total' => $totalBatches,
'size' => $batchSize,
'selected_ids' => !empty($idsToCheck) ? implode(',', $idsToCheck) : ''
];
return [
'success' => true,
'message' => sprintf("第%d批检查完成: %d成功, %d失败", $batchNumber, $successCount, $failedCount),
'total' => count($urls),
'success_count' => $successCount,
'failed_count' => $failedCount,
'avg_response_time' => $avgResponseTime,
'results' => $results,
'has_more' => $hasMore,
'checked_ids' => $checkedIds,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'next_batch_info' => json_encode($nextBatchInfo),
'processed_count' => $processedCount,
'total_to_process' => !empty($idsToCheck) ? count($idsToCheck) : $totalUrls
];
} catch (Exception $e) {
error_log('UrlNav: 检查网站状态失败: ' . $e->getMessage());
return [
'success' => false,
'message' => '检查失败: ' . $e->getMessage(),
'has_more' => false,
'batch_number' => isset($batchNumber) ? $batchNumber : 1,
'total_batches' => isset($totalBatches) ? $totalBatches : 1
];
}
}
/**
* 检查单个网站状态 - 优化版解决HEAD请求兼容性问题
*/
private static function checkSingleWebsite($url)
{
$startTime = microtime(true);
try {
// 获取配置的超时时间
$config = self::getConfig();
$timeout = intval($config->statusCheckTimeout ?? 15);
// 记录调试信息
error_log("UrlNav: 开始检查网站状态: {$url} [使用GET方法]");
$ch = curl_init();
// 修改使用GET请求而不是HEAD请求
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true, // 改为true以获取响应
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_CONNECTTIMEOUT => $timeout,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
CURLOPT_ENCODING => '',
CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
CURLOPT_FAILONERROR => false,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_NOBODY => false, // 重要使用GET而不是HEAD
CURLOPT_HEADER => true, // 获取头部信息
CURLOPT_RANGE => '0-1024', // 只获取前1KB内容减少带宽使用
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$totalTime = curl_getinfo($ch, CURLINFO_TOTAL_TIME);
$connectTime = curl_getinfo($ch, CURLINFO_CONNECT_TIME);
$namelookupTime = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME);
$redirectCount = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT);
$effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$error = curl_error($ch);
$errorNo = curl_errno($ch);
curl_close($ch);
$endTime = microtime(true);
$responseTime = round(($endTime - $startTime) * 1000, 2);
// 详细记录
error_log("UrlNav: GET检查结果 - URL: {$url}, HTTP: {$httpCode}, 耗时: {$totalTime}s");
if ($errorNo !== 0) {
// cURL错误
$errorMessages = [
CURLE_COULDNT_CONNECT => '无法连接服务器',
CURLE_COULDNT_RESOLVE_HOST => '无法解析主机名',
CURLE_OPERATION_TIMEDOUT => '请求超时',
CURLE_SSL_CONNECT_ERROR => 'SSL连接错误',
CURLE_GOT_NOTHING => '服务器无响应',
];
$errorMessage = $errorMessages[$errorNo] ?? '连接失败: ' . $error;
error_log("UrlNav: cURL错误 - {$errorMessage}");
return [
'success' => false,
'status_code' => 0,
'message' => $errorMessage,
'response_time' => $responseTime
];
}
// 更宽松的成功判断
$isSuccess = false;
$statusMessage = '';
// 判断是否成功
if ($httpCode >= 200 && $httpCode < 500) {
// 2xx, 3xx, 4xx都算成功网站可访问
// 4xx表示客户端错误但网站本身是可访问的
$isSuccess = true;
if ($httpCode >= 200 && $httpCode < 300) {
$statusMessage = "正常 (HTTP {$httpCode})";
} elseif ($httpCode >= 300 && $httpCode < 400) {
$statusMessage = "重定向 (HTTP {$httpCode})";
} else {
$statusMessage = "客户端错误但网站可访问 (HTTP {$httpCode})";
}
// 添加重定向信息
if ($redirectCount > 0 && $url != $effectiveUrl) {
$statusMessage .= ",经过 {$redirectCount} 次重定向";
}
} elseif ($httpCode >= 500) {
// 5xx服务器错误
$isSuccess = false;
$statusMessage = "服务器错误 (HTTP {$httpCode})";
} elseif ($httpCode == 0) {
// 没有获取到HTTP状态码
$isSuccess = false;
$statusMessage = "未获取到HTTP状态码";
}
// 特殊处理:如果总时间超过阈值但仍返回成功码,标记为慢速但成功
if ($isSuccess && $totalTime > 3) {
$statusMessage = "慢速但可访问 ({$totalTime}s, HTTP {$httpCode})";
}
return [
'success' => $isSuccess,
'status_code' => $httpCode,
'message' => $statusMessage,
'response_time' => $responseTime,
'response_time_curl' => round($totalTime * 1000, 2),
'redirect_count' => $redirectCount,
'effective_url' => $effectiveUrl,
'method' => 'GET', // 记录使用的方法
'timing' => [
'total' => $totalTime,
'connect' => $connectTime,
'dns' => $namelookupTime
]
];
} catch (Exception $e) {
error_log("UrlNav: 检查异常 - {$url}: " . $e->getMessage());
return [
'success' => false,
'status_code' => 0,
'message' => '检查异常: ' . $e->getMessage(),
'response_time' => round((microtime(true) - $startTime) * 1000, 2)
];
}
}
/**
* 检查网站状态 - 修复版
*/
public function checkStatus()
{
try {
// 检查是否是批量检查 - 接收两种可能的参数名
$urlIds = $this->request->get('url_ids');
if (empty($urlIds)) {
$urlIds = $this->request->get('urlIds'); // 尝试另一种参数名
}
$batchInfo = $this->request->get('batch_info');
// 解析URL IDs
$idArray = null;
if ($urlIds && $urlIds !== '') {
// 处理不同的ID格式
if (is_array($urlIds)) {
$idArray = $urlIds;
} else if (strpos($urlIds, ',') !== false) {
// 逗号分隔的字符串
$idArray = array_map('trim', explode(',', $urlIds));
$idArray = array_filter($idArray, function($id) {
return is_numeric($id) && $id > 0;
});
} else if (is_numeric($urlIds)) {
// 单个ID
$idArray = [$urlIds];
}
}
// 解析批次信息
$batchData = null;
if ($batchInfo && is_string($batchInfo)) {
$batchData = json_decode($batchInfo, true);
} else if (is_array($batchInfo)) {
$batchData = $batchInfo;
}
// 如果是选中的网址,需要特殊处理批次信息
if (!empty($idArray) && $batchData) {
// 重新计算批次信息,因为选中的网址总数可能和全部网址不同
$batchSize = $batchData['size'] ?? 10;
$totalSelected = count($idArray);
$totalBatches = ceil($totalSelected / $batchSize);
$batchNumber = $batchData['batch'] ?? 1;
// 确保批次号不超过总批次
if ($batchNumber > $totalBatches) {
return $this->response->throwJson([
'success' => true,
'message' => '所有选中的网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false
]);
}
// 获取当前批次要检查的ID
$offset = ($batchNumber - 1) * $batchSize;
$batchIds = array_slice($idArray, $offset, $batchSize);
// 记录调试信息
error_log("UrlNav Action: 选中检查,批次: $batchNumber/$totalBatches, 本批ID: " . implode(',', $batchIds));
// 调用检查函数 - 传递当前批次的ID不再传递批次信息让后端分页
$result = UrlNav_Plugin::manualCheckStatus($batchIds, true, null);
// 更新批次信息
$result['batch_number'] = $batchNumber;
$result['total_batches'] = $totalBatches;
$result['has_more'] = ($batchNumber < $totalBatches);
$result['batch_info'] = json_encode([
'batch' => $batchNumber,
'total' => $totalBatches,
'size' => $batchSize,
'selected_ids' => implode(',', $idArray) // 记录所有选中的ID
]);
$this->response->throwJson($result);
return;
}
// 如果没有选中网址,检查全部(已有的逻辑)
$result = UrlNav_Plugin::manualCheckStatus(null, true, $batchInfo);
$this->response->throwJson($result);
} catch (Exception $e) {
$this->response->throwJson([
'success' => false,
'message' => '检查失败: ' . $e->getMessage()
]);
}
}
/**
* 获取网站总数
*/
private static function getTotalUrlCount()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取网站总数失败: ' . $e->getMessage());
return 0;
}
}
/**
* 更新状态检查统计
*/
private static function updateStatusStats($checkResult)
{
try {
$db = self::getDbConnection();
// 获取当前统计
$stmt = $db->query("SELECT * FROM urlnav_status_stats LIMIT 1");
$currentStats = $stmt->fetch(PDO::FETCH_ASSOC);
if ($currentStats) {
// 更新统计
$totalChecks = $currentStats['total_checks'] + $checkResult['total'];
$successChecks = $currentStats['success_checks'] + $checkResult['success_count'];
$failedChecks = $currentStats['failed_checks'] + $checkResult['failed_count'];
// 计算平均响应时间(加权平均)
$currentAvg = $currentStats['avg_response_time'] * $currentStats['total_checks'];
$newAvg = $checkResult['avg_response_time'] * $checkResult['total'];
$avgResponseTime = $totalChecks > 0 ? ($currentAvg + $newAvg) / $totalChecks : 0;
$stmt = $db->prepare("
UPDATE urlnav_status_stats SET
total_checks = ?,
success_checks = ?,
failed_checks = ?,
avg_response_time = ?,
last_check_time = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = ?
");
$stmt->execute([
$totalChecks,
$successChecks,
$failedChecks,
round($avgResponseTime, 2),
$currentStats['id']
]);
} else {
// 插入新统计
$stmt = $db->prepare("
INSERT INTO urlnav_status_stats
(total_checks, success_checks, failed_checks, avg_response_time, last_check_time)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute([
$checkResult['total'],
$checkResult['success_count'],
$checkResult['failed_count'],
round($checkResult['avg_response_time'], 2)
]);
}
} catch (Exception $e) {
error_log('UrlNav: 更新状态检查统计失败: ' . $e->getMessage());
}
}
/**
* 获取状态检查统计
*/
public static function getStatusStats()
{
try {
$db = self::getDbConnection();
// 获取总数
$totalQuery = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$totalResult = $totalQuery->fetch(PDO::FETCH_ASSOC);
$total = $totalResult ? $totalResult['total'] : 0;
// 获取在线数
$onlineQuery = $db->query("SELECT COUNT(*) as online FROM urlnav_urls WHERE is_active = 1 AND is_online = 1");
$onlineResult = $onlineQuery->fetch(PDO::FETCH_ASSOC);
$online = $onlineResult ? $onlineResult['online'] : 0;
// 获取离线数
$offlineQuery = $db->query("SELECT COUNT(*) as offline FROM urlnav_urls WHERE is_active = 1 AND is_online = 0 AND last_status_check IS NOT NULL");
$offlineResult = $offlineQuery->fetch(PDO::FETCH_ASSOC);
$offline = $offlineResult ? $offlineResult['offline'] : 0;
// 获取未检查数
$uncheckedQuery = $db->query("SELECT COUNT(*) as unchecked FROM urlnav_urls WHERE is_active = 1 AND last_status_check IS NULL");
$uncheckedResult = $uncheckedQuery->fetch(PDO::FETCH_ASSOC);
$unchecked = $uncheckedResult ? $uncheckedResult['unchecked'] : 0;
// 添加RSS统计
$rssQuery = $db->query("SELECT
SUM(CASE WHEN rss_url IS NOT NULL AND rss_url != '' THEN 1 ELSE 0 END) as has_rss,
SUM(CASE WHEN rss_url IS NULL OR rss_url = '' THEN 1 ELSE 0 END) as no_rss
FROM urlnav_urls WHERE is_active = 1");
$rssResult = $rssQuery->fetch(PDO::FETCH_ASSOC);
$has_rss = $rssResult ? $rssResult['has_rss'] : 0;
$no_rss = $rssResult ? $rssResult['no_rss'] : 0;
// 计算在线率
$online_rate = $total > 0 ? round(($online / $total) * 100, 1) : 0;
return [
'total' => (int)$total,
'online' => (int)$online,
'offline' => (int)$offline,
'unchecked' => (int)$unchecked,
'online_rate' => (float)$online_rate,
'has_rss' => (int)$has_rss,
'no_rss' => (int)$no_rss
];
} catch (Exception $e) {
error_log('UrlNav getStatusStats error: ' . $e->getMessage());
return [
'total' => 0,
'online' => 0,
'offline' => 0,
'unchecked' => 0,
'online_rate' => 0,
'has_rss' => 0,
'no_rss' => 0
];
}
}
/**
* 获取RSS刷新统计
*/
public static function getRssRefreshStats()
{
try {
$stats = array();
// 获取有RSS的网址总数
$stats['total_with_rss'] = self::getTotalUrlsWithRss();
// 获取从未刷新的数量
$stats['never_refreshed'] = self::getUnrefreshedUrlsCount();
// 获取最久刷新时间
$stats['oldest_refresh'] = self::getOldestRefreshTime();
// 计算进度
if ($stats['total_with_rss'] > 0) {
$stats['refreshed_percentage'] = round(($stats['total_with_rss'] - $stats['never_refreshed']) / $stats['total_with_rss'] * 100, 1);
} else {
$stats['refreshed_percentage'] = 0;
}
// 获取配置的每批处理数量
$config = self::getConfig();
$stats['refresh_limit'] = intval($config->rssRefreshLimit ?? 20);
// 计算预计完成所需任务次数
if ($stats['refresh_limit'] > 0 && $stats['never_refreshed'] > 0) {
$stats['estimated_tasks'] = ceil($stats['never_refreshed'] / $stats['refresh_limit']);
} else {
$stats['estimated_tasks'] = 0;
}
// 计算最久未刷新的时间
if ($stats['oldest_refresh']) {
$hoursAgo = round((time() - strtotime($stats['oldest_refresh'])) / 3600, 1);
$stats['oldest_hours_ago'] = $hoursAgo;
}
return $stats;
} catch (Exception $e) {
error_log('UrlNav: 获取RSS刷新统计失败: ' . $e->getMessage());
return array(
'total_with_rss' => 0,
'never_refreshed' => 0,
'refreshed_percentage' => 0,
'refresh_limit' => 20,
'estimated_tasks' => 0
);
}
}
// ============ 收藏功能相关方法 ============
/**
* 添加收藏
*/
public static function addFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
// 获取完整的文章信息
$stmt = $db->prepare("
SELECT c.*, u.title as site_title, u.url as site_url, cat.name as category_name
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
LEFT JOIN urlnav_categories cat ON u.category_id = cat.id
WHERE c.id = ?
");
$stmt->execute([$feedId]);
$feed = $stmt->fetch(PDO::FETCH_ASSOC);
if (!$feed) {
return ['success' => false, 'message' => '文章不存在'];
}
// 检查是否已收藏
$stmt = $db->prepare("SELECT id FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
$existing = $stmt->fetch();
if ($existing) {
return ['success' => false, 'message' => '已收藏'];
}
// 添加收藏(保存完整信息)
$stmt = $db->prepare("
INSERT INTO urlnav_favorites
(user_id, feed_id, feed_title, feed_link, feed_description, full_content, pub_date,
site_title, site_url, category_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
");
$stmt->execute([
$userId,
$feedId,
$feed['feed_title'],
$feed['feed_link'],
$feed['feed_description'] ?? '',
$feed['full_content'] ?? '', // 保存完整内容
$feed['pub_date'],
$feed['site_title'] ?? '',
$feed['site_url'] ?? '',
$feed['category_name'] ?? ''
]);
// 标记文章为新鲜状态,避免被清理
$stmt = $db->prepare("UPDATE urlnav_rss_cache SET is_fresh = 1 WHERE id = ?");
$stmt->execute([$feedId]);
return ['success' => true, 'message' => '收藏成功'];
} catch (Exception $e) {
error_log('UrlNav: 添加收藏失败: ' . $e->getMessage());
return ['success' => false, 'message' => '收藏失败: ' . $e->getMessage()];
}
}
/**
* 取消收藏
*/
public static function removeFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("DELETE FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
if ($stmt->rowCount() > 0) {
return ['success' => true, 'message' => '已取消收藏'];
} else {
return ['success' => false, 'message' => '未收藏'];
}
} catch (Exception $e) {
error_log('UrlNav: 取消收藏失败: ' . $e->getMessage());
return ['success' => false, 'message' => '取消收藏失败: ' . $e->getMessage()];
}
}
/**
* 检查是否已收藏
*/
public static function isFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT id FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
$result = $stmt->fetch();
return $result ? true : false;
} catch (Exception $e) {
error_log('UrlNav: 检查收藏失败: ' . $e->getMessage());
return false;
}
}
/**
* 获取用户的收藏列表
*/
public static function getFavorites($userId = 0, $page = 1, $pageSize = 20, $search = '')
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "WHERE f.user_id = ?";
$params = [$userId];
if ($search) {
$whereClause .= " AND (f.feed_title LIKE ? OR f.feed_description LIKE ? OR f.full_content LIKE ? OR f.site_title LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countSql = "SELECT COUNT(*) as total FROM urlnav_favorites f $whereClause";
$countStmt = $db->prepare($countSql);
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// 计算分页
$offset = ($page - 1) * $pageSize;
// 获取数据
$sql = "SELECT
f.id,
f.feed_id as original_feed_id,
f.feed_title,
f.feed_link,
f.feed_description,
f.full_content,
f.pub_date,
f.site_title,
f.site_url,
f.category_name,
f.favorited_at
FROM urlnav_favorites f
$whereClause
ORDER BY f.favorited_at DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, [$pageSize, $offset]);
$stmt->execute($stmtParams);
$feeds = $stmt->fetchAll(PDO::FETCH_ASSOC);
return [
'success' => true,
'total' => $total,
'totalPages' => ceil($total / $pageSize),
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $feeds
];
} catch (Exception $e) {
error_log('UrlNav: 获取收藏列表失败: ' . $e->getMessage());
return [
'success' => false,
'message' => '获取收藏失败',
'total' => 0,
'totalPages' => 0,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => []
];
}
}
/**
* 获取收藏统计
*/
public static function getFavoriteStats($userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_favorites WHERE user_id = ?");
$stmt->execute([$userId]);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return [
'total' => $result['total'] ?? 0
];
} catch (Exception $e) {
error_log('UrlNav: 获取收藏统计失败: ' . $e->getMessage());
return ['total' => 0];
}
}
}
/**
* RSS管理器类 - 封装RSS相关操作
*/
class UrlNav_RssManager
{
private $db;
public function __construct()
{
$this->db = UrlNav_Plugin::getDbConnection();
}
/**
* 智能刷新策略
*/
public function smartRefresh($urlId, $force = false)
{
$url = $this->getUrlInfo($urlId);
if (!$url) {
throw new Exception('URL不存在');
}
// 检查是否需要刷新
if (!$force && !$this->shouldRefresh($url)) {
return array('skipped' => true, 'reason' => '未到刷新时间');
}
// 执行刷新
return $this->refreshUrl($url);
}
private function getUrlInfo($urlId)
{
$stmt = $this->db->prepare("SELECT * FROM urlnav_urls WHERE id = ?");
$stmt->execute(array($urlId));
return $stmt->fetch(PDO::FETCH_ASSOC);
}
private function shouldRefresh($url)
{
$config = UrlNav_Plugin::getConfig();
$refreshInterval = intval($config->rssRefresh ?? 3600);
// 如果从未刷新过,需要刷新
if (empty($url['last_refresh'])) {
return true;
}
$lastRefresh = strtotime($url['last_refresh']);
$currentTime = time();
// 检查是否达到刷新间隔
if (($currentTime - $lastRefresh) >= $refreshInterval) {
return true;
}
// 如果最近失败次数多,降低刷新频率
$failureRate = $url['failure_count'] / max(1, $url['refresh_count']);
if ($failureRate > 0.5) {
// 失败率超过50%,延长刷新间隔
$extendedInterval = $refreshInterval * 3;
return ($currentTime - $lastRefresh) >= $extendedInterval;
}
return false;
}
private function refreshUrl($url)
{
// 这里可以实现单个URL的刷新逻辑
// 实际实现可以调用UrlNav_Plugin::parseRssFeed等方法
return array('success' => true);
}
}
?>