Files
UrlNav/Plugin.php
2026-02-23 20:15:55 +08:00

5080 lines
205 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
if (!defined('__TYPECHO_ROOT_DIR__')) exit;
/**
* 网址导航、RSS信息阅读
*
* @package UrlNav
* @author 石头厝
* @version 1.5.0
* @link https://www.shitoucuo.com
*/
class UrlNav_Plugin implements Typecho_Plugin_Interface
{
/**
* 数据库文件路径
*/
private static $dbPath;
/**
* RSS刷新管理器实例
*/
private static $rssManager = null;
/**
* 激活插件方法
*/
public static function activate()
{
// 初始化数据库
self::initDbPath();
self::initDatabase();
self::migrateDatabase();
// 添加后台管理菜单
Helper::addPanel(3, 'UrlNav/Manage.php', '网址导航', '网址导航管理', 'administrator');
Helper::addPanel(3, 'UrlNav/Rss.php', 'RSS信息', 'RSS信息阅读', 'administrator');
// 添加动作处理
Helper::addAction('urlnav', 'UrlNav_Action');
// 注册路由 - 使用与Collection相同的格式
Helper::addRoute('urlnav_action', '/action/urlnav', 'UrlNav_Action', 'action');
// 注册独立的定时任务路由(网站状态检查)
Helper::addRoute('urlnav_status_cron', '/urlnav-status-cron', 'UrlNav_Action', 'statusCron');
// 注册独立的定时任务路由RSS刷新
Helper::addRoute('urlnav_rss_cron', '/urlnav-rss-cron', 'UrlNav_Action', 'rssCron');
return _t('网址导航插件已激活');
}
/**
* 禁用插件方法
*/
public static function deactivate()
{
// 移除管理菜单
Helper::removePanel(3, 'UrlNav/Manage.php');
Helper::removePanel(3, 'UrlNav/Rss.php');
Helper::removeAction('urlnav');
Helper::removeRoute('urlnav_action');
Helper::removeRoute('urlnav_status_cron');
Helper::removeRoute('urlnav_rss_cron');
return _t('网址导航插件已禁用');
}
/**
* 获取插件配置面板
*/
public static function config(Typecho_Widget_Helper_Form $form)
{
// ================== 网址管理配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>网址管理配置</h2>');
// 每页显示数量
$pageSize = new Typecho_Widget_Helper_Form_Element_Text('pageSize', null, '20',
_t('每页显示数量'), _t('后台管理中每页显示的网址数量'));
$pageSize->input->setAttribute('class', 'mini');
$form->addInput($pageSize);
// 是否开启网址验证
$validateUrl = new Typecho_Widget_Helper_Form_Element_Radio('validateUrl', array(
'1' => _t('开启'),
'0' => _t('关闭')
), '1', _t('网址验证'), _t('新增网址时是否验证网址有效性'));
$form->addInput($validateUrl);
// ================== 全文抓取配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>全文抓取配置</h2>');
// 是否开启全文抓取
$enableFullText = new Typecho_Widget_Helper_Form_Element_Radio('enableFullText', array(
'1' => _t('开启'),
'0' => _t('关闭')
), '0', _t('开启全文抓取'), _t('开启后会对白名单中的网站自动抓取全文'));
$form->addInput($enableFullText);
// 白名单配置(多行文本)
$fullTextWhitelist = new Typecho_Widget_Helper_Form_Element_Textarea('fullTextWhitelist', null,
"https://wiki.eryajf.net/learning-weekly.xml|.markdown-body\nhttps://example.com/rss|#content",
_t('全文抓取白名单'),
_t('每行一个格式RSS地址|内容选择器CSS选择器<br>示例https://wiki.eryajf.net/learning-weekly.xml|.post-content'));
$form->addInput($fullTextWhitelist);
// 每个站点抓取全文的篇数
$fullTextPerSite = new Typecho_Widget_Helper_Form_Element_Text('fullTextPerSite', null, '3',
_t('每站抓取全文篇数'), _t('每个RSS源最多抓取几篇的全文建议1-5'));
$fullTextPerSite->input->setAttribute('class', 'mini');
$form->addInput($fullTextPerSite);
// 页面抓取超时时间(单篇文章)
$pageFetchTimeout = new Typecho_Widget_Helper_Form_Element_Text('pageFetchTimeout', null, '8',
_t('页面抓取超时时间(秒)'), _t('抓取单篇文章页面时的超时时间建议8-15秒'));
$pageFetchTimeout->input->setAttribute('class', 'mini');
$form->addInput($pageFetchTimeout);
// ================== RSS配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>RSS配置</h2>');
// RSS页面每页显示数量
$rssPageSize = new Typecho_Widget_Helper_Form_Element_Text('rssPageSize', null, '30',
_t('RSS页面每页显示数量'), _t('RSS信息页面每页显示的文章数量'));
$rssPageSize->input->setAttribute('class', 'mini');
$form->addInput($rssPageSize);
// RSS刷新间隔
$rssRefresh = new Typecho_Widget_Helper_Form_Element_Text('rssRefresh', null, '3600',
_t('RSS刷新间隔'), _t('建议的RSS刷新间隔时间实际执行时间由宝塔计划任务决定'));
$rssRefresh->input->setAttribute('class', 'mini');
$form->addInput($rssRefresh);
// 【新增】每次自动刷新网址数量
$rssRefreshLimit = new Typecho_Widget_Helper_Form_Element_Text('rssRefreshLimit', null, '20',
_t('每次自动刷新网址数量'), _t('每次定时任务最多刷新的RSS网址数量建议10-50根据服务器性能调整'));
$rssRefreshLimit->input->setAttribute('class', 'mini');
$form->addInput($rssRefreshLimit);
// 每个站点最大文章数
$maxFeedsPerSite = new Typecho_Widget_Helper_Form_Element_Text('maxFeedsPerSite', null, '5',
_t('每个站点最大文章数'), _t('每个RSS源最多显示的文章数量'));
$maxFeedsPerSite->input->setAttribute('class', 'mini');
$form->addInput($maxFeedsPerSite);
// RSS文章保留时间改为下拉框
$rssKeepTime = new Typecho_Widget_Helper_Form_Element_Select('rssKeepTime',
array(
'0' => _t('不自动清理(默认)'), // ← 将"默认"标识放在这里
'86400' => _t('一天之前24小时前'),
'259200' => _t('三天之前72小时前'),
'604800' => _t('一周之前7天前'),
'1296000' => _t('半个月之前15天前'),
'2592000' => _t('一个月之前30天前'),
'7776000' => _t('三个月之前90天前'),
'15552000' => _t('半年之前180天前')
),
'259200', // ← 这里改为 0默认不清理
_t('RSS文章保留时间'),
_t('自动清理超过此时间的RSS文章按照文章发布时间判断默认不自动清理'));
$form->addInput($rssKeepTime);
// RSS最大缓存条数
$maxCachePerSite = new Typecho_Widget_Helper_Form_Element_Text('maxCachePerSite', null, '5',
_t('每个站点最大缓存条数'), _t('每个RSS源最多缓存的文章数量0表示不限制'));
$maxCachePerSite->input->setAttribute('class', 'mini');
$form->addInput($maxCachePerSite);
// 连接超时时间
$fetchTimeout = new Typecho_Widget_Helper_Form_Element_Text('fetchTimeout', null, '5',
_t('RSS抓取超时时间'), _t('抓取RSS源时的超时时间'));
$fetchTimeout->input->setAttribute('class', 'mini');
$form->addInput($fetchTimeout);
// 失败重试次数
$retryTimes = new Typecho_Widget_Helper_Form_Element_Text('retryTimes', null, '2',
_t('失败重试次数'), _t('RSS抓取失败时的重试次数'));
$retryTimes->input->setAttribute('class', 'mini');
$form->addInput($retryTimes);
// ================== 网站状态检查配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>网站状态检查配置</h2>');
// 状态检查超时时间
$statusCheckTimeout = new Typecho_Widget_Helper_Form_Element_Text('statusCheckTimeout', null, '8',
_t('状态检查超时时间(秒)'), _t('检查网站状态时的超时时间'));
$statusCheckTimeout->input->setAttribute('class', 'mini');
$form->addInput($statusCheckTimeout);
// 每次检查的最大数量
$statusCheckMax = new Typecho_Widget_Helper_Form_Element_Text('statusCheckMax', null, '80',
_t('每次检查最大数量'), _t('每次自动检查时最多检查的网址数量'));
$statusCheckMax->input->setAttribute('class', 'mini');
$form->addInput($statusCheckMax);
// ================== 定时任务配置 ==================
$form->addItem(new Typecho_Widget_Helper_Layout('div', array('class' => 'typecho-page-title')), '<h2>定时任务配置</h2>');
// RSS定时任务访问密钥
$rssCronSecret = new Typecho_Widget_Helper_Form_Element_Text('rssCronSecret', null, self::generateSecret(),
_t('RSS定时任务密钥'), _t('用于RSS定时任务访问的密钥请妥善保管'));
$form->addInput($rssCronSecret);
// 状态检查定时任务访问密钥
$statusCronSecret = new Typecho_Widget_Helper_Form_Element_Text('statusCronSecret', null, self::generateSecret(),
_t('状态检查定时任务密钥'), _t('用于状态检查定时任务访问的密钥,请妥善保管'));
$form->addInput($statusCronSecret);
}
/**
* 个人用户的配置面板
*/
public static function personalConfig(Typecho_Widget_Helper_Form $form) {}
/**
* 初始化数据库路径
*/
private static function initDbPath()
{
$dbDir = __DIR__ . '/db';
// 确保目录存在
if (!is_dir($dbDir)) {
@mkdir($dbDir, 0755, true);
}
$dbFiles = glob($dbDir . '/urlnav_*.db');
if (!empty($dbFiles)) {
self::$dbPath = $dbFiles[0];
} else {
$randomStr = substr(md5(uniqid(rand(), true)), 0, 10);
self::$dbPath = $dbDir . '/urlnav_' . $randomStr . '.db';
}
}
/**
* 生成随机密钥
*/
private static function generateSecret()
{
return substr(md5(uniqid(rand(), true) . time()), 0, 16);
}
public static function getCategoryStats($categoryId) {
$db = self::getDbConnection();
// 获取网址总数
$stmt = $db->prepare("SELECT COUNT(*) as url_count FROM urlnav_urls WHERE category_id = ? AND is_active = 1");
$stmt->execute([$categoryId]);
$urlCount = $stmt->fetchColumn();
// 获取有RSS的网址数
$stmt = $db->prepare("SELECT COUNT(*) as rss_count FROM urlnav_urls WHERE category_id = ? AND rss_url IS NOT NULL AND rss_url != '' AND is_active = 1");
$stmt->execute([$categoryId]);
$rssCount = $stmt->fetchColumn();
return [
'url_count' => (int)$urlCount,
'rss_count' => (int)$rssCount
];
}
/**
* 初始化数据库
*/
private static function initDatabase()
{
if (empty(self::$dbPath)) {
self::initDbPath();
}
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// 检查分类表是否存在
$tableCheck = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_categories'");
if (!$tableCheck->fetch()) {
// 创建分类表
$db->exec("CREATE TABLE urlnav_categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
description TEXT,
sort_order INTEGER DEFAULT 0,
is_active INTEGER DEFAULT 1,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
)");
// 插入默认分类
$db->exec("INSERT INTO urlnav_categories (name, description, sort_order) VALUES
('常用工具', '日常使用的在线工具', 1),
('设计资源', '设计相关的素材和资源', 2),
('开发资源', '程序开发相关资源', 3),
('技术社区', '技术交流和学习社区', 4)");
}
// 检查网址表是否存在
$tableCheck2 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_urls'");
if (!$tableCheck2->fetch()) {
// 创建网址表
$db->exec("CREATE TABLE urlnav_urls (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT NOT NULL,
description TEXT,
rss_url TEXT,
category_id INTEGER,
star_rating INTEGER DEFAULT 0, -- 新增星级评分0-3表示0-3颗星
sort_order INTEGER DEFAULT 0,
is_active INTEGER DEFAULT 1,
is_online INTEGER DEFAULT 1,
last_status_check DATETIME,
status_check_count INTEGER DEFAULT 0,
last_status_code INTEGER,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_refresh DATETIME,
refresh_count INTEGER DEFAULT 0,
success_count INTEGER DEFAULT 0,
failure_count INTEGER DEFAULT 0,
last_error TEXT,
FOREIGN KEY (category_id) REFERENCES urlnav_categories(id) ON DELETE SET NULL
)");
// 创建索引
$db->exec("CREATE INDEX idx_category_id ON urlnav_urls(category_id)");
$db->exec("CREATE INDEX idx_is_active ON urlnav_urls(is_active)");
$db->exec("CREATE INDEX idx_rss_url ON urlnav_urls(rss_url)");
$db->exec("CREATE INDEX idx_last_refresh ON urlnav_urls(last_refresh)");
$db->exec("CREATE INDEX idx_is_online ON urlnav_urls(is_online)");
$db->exec("CREATE INDEX idx_last_status_check ON urlnav_urls(last_status_check)");
}
// 创建RSS缓存表 - 修改添加full_content字段
$tableCheck3 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_rss_cache'");
if (!$tableCheck3->fetch()) {
$db->exec("CREATE TABLE urlnav_rss_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url_id INTEGER NOT NULL,
feed_title TEXT NOT NULL,
feed_link TEXT NOT NULL,
feed_description TEXT,
full_content TEXT, -- 新增:完整内容字段
pub_date DATETIME NOT NULL,
guid TEXT NOT NULL,
cached_at DATETIME DEFAULT CURRENT_TIMESTAMP,
is_fresh INTEGER DEFAULT 1,
FOREIGN KEY (url_id) REFERENCES urlnav_urls(id) ON DELETE CASCADE,
UNIQUE(url_id, guid)
)");
$db->exec("CREATE INDEX idx_url_id ON urlnav_rss_cache(url_id)");
$db->exec("CREATE INDEX idx_pub_date ON urlnav_rss_cache(pub_date)");
$db->exec("CREATE INDEX idx_cached_at ON urlnav_rss_cache(cached_at)");
$db->exec("CREATE INDEX idx_is_fresh ON urlnav_rss_cache(is_fresh)");
$db->exec("CREATE UNIQUE INDEX idx_url_guid ON urlnav_rss_cache(url_id, guid)");
}
// 创建收藏表 - 新增
$tableCheck8 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_favorites'");
if (!$tableCheck8->fetch()) {
$db->exec("CREATE TABLE urlnav_favorites (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER DEFAULT 0,
feed_id INTEGER NOT NULL,
feed_title TEXT NOT NULL,
feed_link TEXT NOT NULL,
feed_description TEXT,
full_content TEXT, -- 新增:完整内容字段
pub_date DATETIME NOT NULL,
site_title TEXT,
site_url TEXT,
category_name TEXT,
favorited_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, feed_id)
)");
$db->exec("CREATE INDEX idx_favorite_user_id ON urlnav_favorites(user_id)");
$db->exec("CREATE INDEX idx_favorite_feed_id ON urlnav_favorites(feed_id)");
$db->exec("CREATE INDEX idx_favorite_created_at ON urlnav_favorites(favorited_at)");
}
// 创建RSS刷新记录表
$tableCheck4 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_refresh_log'");
if (!$tableCheck4->fetch()) {
$db->exec("CREATE TABLE urlnav_refresh_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
refresh_type TEXT NOT NULL,
success_count INTEGER DEFAULT 0,
total_feeds INTEGER DEFAULT 0,
url_count INTEGER DEFAULT 0,
new_articles INTEGER DEFAULT 0,
error_message TEXT,
refresh_time DATETIME DEFAULT CURRENT_TIMESTAMP,
duration INTEGER DEFAULT 0,
cron_type TEXT DEFAULT 'rss' -- 新增区分RSS和状态检查
)");
$db->exec("CREATE INDEX idx_refresh_time ON urlnav_refresh_log(refresh_time)");
$db->exec("CREATE INDEX idx_refresh_type ON urlnav_refresh_log(refresh_type)");
$db->exec("CREATE INDEX idx_cron_type ON urlnav_refresh_log(cron_type)");
}
// 创建定时任务记录表
$tableCheck5 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_cron_log'");
if (!$tableCheck5->fetch()) {
$db->exec("CREATE TABLE urlnav_cron_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
cron_type TEXT NOT NULL,
executed_time DATETIME DEFAULT CURRENT_TIMESTAMP,
result TEXT,
error_message TEXT
)");
$db->exec("CREATE INDEX idx_executed_time ON urlnav_cron_log(executed_time)");
$db->exec("CREATE INDEX idx_cron_type ON urlnav_cron_log(cron_type)");
}
// 创建状态检查记录表
$tableCheck6 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_status_log'");
if (!$tableCheck6->fetch()) {
$db->exec("CREATE TABLE urlnav_status_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url_id INTEGER NOT NULL,
is_online INTEGER DEFAULT 0,
status_code INTEGER,
response_time INTEGER,
check_time DATETIME DEFAULT CURRENT_TIMESTAMP,
error_message TEXT,
FOREIGN KEY (url_id) REFERENCES urlnav_urls(id) ON DELETE CASCADE
)");
$db->exec("CREATE INDEX idx_url_id_status ON urlnav_status_log(url_id)");
$db->exec("CREATE INDEX idx_check_time ON urlnav_status_log(check_time)");
$db->exec("CREATE INDEX idx_is_online_status ON urlnav_status_log(is_online)");
}
// 创建状态检查统计表
$tableCheck7 = $db->query("SELECT name FROM sqlite_master WHERE type='table' AND name='urlnav_status_stats'");
if (!$tableCheck7->fetch()) {
$db->exec("CREATE TABLE urlnav_status_stats (
id INTEGER PRIMARY KEY AUTOINCREMENT,
total_checks INTEGER DEFAULT 0,
success_checks INTEGER DEFAULT 0,
failed_checks INTEGER DEFAULT 0,
avg_response_time REAL DEFAULT 0,
last_check_time DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
)");
// 初始化一条记录
$db->exec("INSERT INTO urlnav_status_stats (total_checks, success_checks, failed_checks, avg_response_time) VALUES (0, 0, 0, 0)");
}
// 创建更新时间触发器
$db->exec("CREATE TRIGGER IF NOT EXISTS update_category_time
AFTER UPDATE ON urlnav_categories
BEGIN
UPDATE urlnav_categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END");
$db->exec("CREATE TRIGGER IF NOT EXISTS update_url_time
AFTER UPDATE ON urlnav_urls
BEGIN
UPDATE urlnav_urls SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END");
$db = null;
} catch (PDOException $e) {
error_log('UrlNav: 数据库初始化失败: ' . $e->getMessage());
}
}
/**
* 数据库迁移
*/
private static function migrateDatabase()
{
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// 检查是否需要添加字段
$tableInfo = $db->query("PRAGMA table_info(urlnav_urls)");
$columns = $tableInfo->fetchAll(PDO::FETCH_ASSOC);
$newColumns = array(
'rss_url' => "ALTER TABLE urlnav_urls ADD COLUMN rss_url TEXT",
'last_refresh' => "ALTER TABLE urlnav_urls ADD COLUMN last_refresh DATETIME",
'refresh_count' => "ALTER TABLE urlnav_urls ADD COLUMN refresh_count INTEGER DEFAULT 0",
'success_count' => "ALTER TABLE urlnav_urls ADD COLUMN success_count INTEGER DEFAULT 0",
'failure_count' => "ALTER TABLE urlnav_urls ADD COLUMN failure_count INTEGER DEFAULT 0",
'last_error' => "ALTER TABLE urlnav_urls ADD COLUMN last_error TEXT",
'is_online' => "ALTER TABLE urlnav_urls ADD COLUMN is_online INTEGER DEFAULT 1",
'last_status_check' => "ALTER TABLE urlnav_urls ADD COLUMN last_status_check DATETIME",
'status_check_count' => "ALTER TABLE urlnav_urls ADD COLUMN status_check_count INTEGER DEFAULT 0",
'last_status_code' => "ALTER TABLE urlnav_urls ADD COLUMN last_status_code INTEGER"
);
foreach ($newColumns as $columnName => $sql) {
$hasColumn = false;
foreach ($columns as $column) {
if ($column['name'] === $columnName) {
$hasColumn = true;
break;
}
}
if (!$hasColumn) {
$db->exec($sql);
}
}
// 检查缓存表是否需要添加is_fresh字段
$cacheTableInfo = $db->query("PRAGMA table_info(urlnav_rss_cache)");
$cacheColumns = $cacheTableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasIsFresh = false;
foreach ($cacheColumns as $column) {
if ($column['name'] === 'is_fresh') {
$hasIsFresh = true;
break;
}
}
if (!$hasIsFresh) {
$db->exec("ALTER TABLE urlnav_rss_cache ADD COLUMN is_fresh INTEGER DEFAULT 1");
$db->exec("CREATE INDEX IF NOT EXISTS idx_is_fresh ON urlnav_rss_cache(is_fresh)");
}
// 检查是否需要添加star_rating字段
$hasStarRating = false;
foreach ($columns as $column) {
if ($column['name'] === 'star_rating') {
$hasStarRating = true;
break;
}
}
if (!$hasStarRating) {
$db->exec("ALTER TABLE urlnav_urls ADD COLUMN star_rating INTEGER DEFAULT 0");
error_log("UrlNav: 已添加star_rating字段到urlnav_urls表");
}
// 检查缓存表是否需要添加full_content字段
$hasFullContent = false;
foreach ($cacheColumns as $column) {
if ($column['name'] === 'full_content') {
$hasFullContent = true;
break;
}
}
if (!$hasFullContent) {
$db->exec("ALTER TABLE urlnav_rss_cache ADD COLUMN full_content TEXT");
$db->exec("ALTER TABLE urlnav_favorites ADD COLUMN full_content TEXT");
}
// 检查refresh_log表是否需要添加cron_type字段
$refreshLogTableInfo = $db->query("PRAGMA table_info(urlnav_refresh_log)");
$refreshLogColumns = $refreshLogTableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasCronType = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'cron_type') {
$hasCronType = true;
break;
}
}
if (!$hasCronType) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN cron_type TEXT DEFAULT 'rss'");
$db->exec("CREATE INDEX IF NOT EXISTS idx_cron_type ON urlnav_refresh_log(cron_type)");
}
// ===== 修复关键添加缺失的new_articles字段 =====
$hasNewArticles = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'new_articles') {
$hasNewArticles = true;
break;
}
}
if (!$hasNewArticles) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN new_articles INTEGER DEFAULT 0");
error_log("UrlNav: 已添加new_articles字段到urlnav_refresh_log表");
}
// 🔴 新增检查是否需要添加message字段
$hasMessage = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'message') {
$hasMessage = true;
break;
}
}
if (!$hasMessage) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN message TEXT");
error_log("UrlNav: 已添加message字段到urlnav_refresh_log表");
}
// 🔴 新增检查是否需要添加details字段
$hasDetails = false;
foreach ($refreshLogColumns as $column) {
if ($column['name'] === 'details') {
$hasDetails = true;
break;
}
}
if (!$hasDetails) {
$db->exec("ALTER TABLE urlnav_refresh_log ADD COLUMN details TEXT");
error_log("UrlNav: 已添加details字段到urlnav_refresh_log表");
}
// ===== 修复结束 =====
$db = null;
} catch (PDOException $e) {
error_log('UrlNav数据库迁移失败: ' . $e->getMessage());
}
}
/**
* 获取数据库连接 - 优化版,解决数据库锁问题
*/
public static function getDbConnection()
{
if (empty(self::$dbPath)) {
self::initDbPath();
}
if (!file_exists(self::$dbPath)) {
self::initDatabase();
}
$maxRetries = 3;
$retryDelay = 1; // 秒
for ($retry = 0; $retry < $maxRetries; $retry++) {
if ($retry > 0) {
error_log("UrlNav: 数据库连接重试 {$retry},等待 {$retryDelay} 秒...");
sleep($retryDelay);
}
try {
$db = new PDO('sqlite:' . self::$dbPath);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$db->exec('PRAGMA foreign_keys = ON');
$db->exec('PRAGMA busy_timeout = 3000'); // 设置3秒超时
$db->exec('PRAGMA journal_mode = WAL'); // 使用WAL模式提高并发性能
return $db;
} catch (PDOException $e) {
if (strpos($e->getMessage(), 'database is locked') !== false && $retry < $maxRetries - 1) {
continue;
}
throw new Exception('数据库连接失败: ' . $e->getMessage());
}
}
throw new Exception('数据库连接失败:重试' . $maxRetries . '次后仍被锁定');
}
/**
* 获取插件配置
*/
public static function getConfig()
{
static $config = null;
if ($config === null) {
$options = Typecho_Widget::widget('Widget_Options');
$config = $options->plugin('UrlNav');
}
return $config;
}
/**
* 获取RSS管理器
*/
private static function getRssManager()
{
if (self::$rssManager === null) {
self::$rssManager = new UrlNav_RssManager();
}
return self::$rssManager;
}
public static function executeRssCronTask()
{
// 立即设置响应头防止502
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出让Nginx知道脚本在运行
echo json_encode(['status' => 'starting', 'timestamp' => time()]);
flush();
ob_flush();
}
$startTime = microtime(true);
try {
error_log("UrlNav RSS定时任务: 开始执行 " . date('Y-m-d H:i:s'));
// 设置更长的执行时间
@set_time_limit(300); // 5分钟
@ini_set('max_execution_time', 300);
// 添加简单的锁检查,防止多个进程同时执行
$lockFile = __DIR__ . '/db/rss_cron_running.lock';
$lockTimeout = 1800; // 30分钟超时
if (file_exists($lockFile)) {
$lockTime = @filemtime($lockFile);
if ($lockTime && (time() - $lockTime) < $lockTimeout) {
error_log("UrlNav RSS定时任务: 跳过执行,已在运行中");
return array(
'success' => false,
'message' => '定时任务已在运行中,跳过本次执行',
'timestamp' => time()
);
}
// 锁已超时,删除它
@unlink($lockFile);
}
// 创建锁文件
@touch($lockFile);
@file_put_contents($lockFile, "Started at: " . date('Y-m-d H:i:s'));
register_shutdown_function(function() use ($lockFile) {
if (file_exists($lockFile)) {
@unlink($lockFile);
error_log("UrlNav: shutdown函数删除RSS锁文件");
}
});
// 执行刷新任务
$refreshResult = self::refreshAllRssFeeds(true);
$duration = round(microtime(true) - $startTime, 2);
// 删除锁文件
if (file_exists($lockFile)) {
@unlink($lockFile);
}
// 🆕 修改确保result包含RSS地址信息
$result = array(
'success' => $refreshResult['success'],
'refreshed' => true,
'refresh_result' => $refreshResult,
'timestamp' => time(),
'duration' => $duration,
'message' => $refreshResult['message'],
// 🆕 关键直接包含RSS地址信息
'successRssUrls' => $refreshResult['successRssUrls'] ?? array(),
'failedRssUrls' => $refreshResult['failedRssUrls'] ?? array()
);
// 记录日志会自动将上面的result转为JSON存入数据库
self::logCron('rss_auto_refresh', $result);
error_log("UrlNav RSS定时任务: 执行完成,耗时 {$duration}");
return $result;
} catch (Exception $e) {
error_log("UrlNav RSS定时任务异常: " . $e->getMessage());
// 确保锁文件被删除
$lockFile = __DIR__ . '/db/rss_cron_running.lock';
if (file_exists($lockFile)) {
@unlink($lockFile);
}
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => 'RSS定时任务执行异常',
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
}
/**
* 执行状态检查定时任务 - 完全移除锁机制
*/
public static function executeStatusCronTask()
{
try {
$startTime = microtime(true);
error_log("UrlNav 状态检查定时任务: 开始执行 " . date('Y-m-d H:i:s'));
// 修改这里:调用正确的自动检查方式
$statusResult = self::manualCheckStatus(null, false); // $urlIds=null, $isBatchCheck=false
$endTime = microtime(true);
$duration = round($endTime - $startTime, 2);
// 更新状态检查统计
self::updateStatusStats($statusResult);
// 记录状态检查专用的定时任务日志
self::logCron('status_auto_check', json_encode(array_merge($statusResult, array(
'duration' => $duration,
'timestamp' => time()
))));
if ($statusResult['success']) {
error_log("UrlNav 状态检查定时任务: 执行成功,耗时 {$duration}");
return array(
'success' => true,
'status_checked' => $statusResult['total'] > 0,
'status_result' => $statusResult,
'timestamp' => time(),
'duration' => $duration,
'message' => '状态检查定时任务执行成功'
);
} else {
error_log("UrlNav 状态检查定时任务: 执行失败: " . $statusResult['message']);
return array(
'success' => false,
'status_checked' => false,
'status_result' => $statusResult,
'timestamp' => time(),
'duration' => $duration,
'message' => '状态检查定时任务执行失败'
);
}
} catch (Exception $e) {
error_log("UrlNav 状态检查定时任务异常: " . $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => '状态检查定时任务执行异常'
);
}
}
/**
* 通用的锁定任务执行器 - 优化版,减少锁竞争
*/
private static function executeLockedTask($lockFile, $taskType, $callback)
{
$lockTimeout = 3600; // 延长到1小时超时
// 简化的锁检查:如果锁文件存在且未超时,直接跳过
if (file_exists($lockFile)) {
$lockTime = @filemtime($lockFile);
if ($lockTime && (time() - $lockTime) < $lockTimeout) {
$lockDuration = time() - $lockTime;
error_log("UrlNav {$taskType}: 跳过执行,锁文件存在 {$lockDuration}");
return array(
'success' => false,
'message' => "{$taskType}定时任务正在运行,跳过本次执行",
'timestamp' => time(),
'lock_time' => $lockTime,
'lock_duration' => $lockDuration
);
}
// 锁已超时,删除它
@unlink($lockFile);
error_log("UrlNav {$taskType}: 删除超时的锁文件(已存在超过 {$lockTimeout} 秒)");
}
// 创建锁文件
if (!@touch($lockFile)) {
error_log("UrlNav {$taskType}: 无法创建锁文件");
return array(
'success' => false,
'message' => '无法创建锁文件',
'timestamp' => time()
);
}
// 在锁文件中记录开始时间
file_put_contents($lockFile, "Started at: " . date('Y-m-d H:i:s') . "\nTask type: {$taskType}");
error_log("UrlNav {$taskType}: 开始执行定时任务 " . date('Y-m-d H:i:s'));
try {
// 确保锁文件会被删除(即使脚本意外终止)
register_shutdown_function(function() use ($lockFile, $taskType) {
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: shutdown函数删除锁文件锁持续了 {$lockDuration}");
}
});
// 执行回调函数
$result = $callback();
// 删除锁文件
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: 任务完成,删除锁文件,任务耗时 {$lockDuration}");
}
return $result;
} catch (Exception $e) {
// 确保锁文件被删除
if (file_exists($lockFile)) {
$lockDuration = time() - filemtime($lockFile);
@unlink($lockFile);
error_log("UrlNav {$taskType}: 异常时删除锁文件,锁持续了 {$lockDuration}");
}
error_log("UrlNav {$taskType}定时任务异常: " . $e->getMessage());
self::logCron('error', $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => "{$taskType}定时任务执行异常"
);
}
}
/**
* 手动解锁定时任务(供调试使用)
*/
public static function unlockCron($cronType = 'rss')
{
if ($cronType === 'rss') {
$lockFile = __DIR__ . '/db/rss_cron.lock';
} elseif ($cronType === 'status') {
$lockFile = __DIR__ . '/db/status_cron.lock';
} else {
$lockFile = __DIR__ . '/db/cron.lock';
}
if (file_exists($lockFile)) {
if (@unlink($lockFile)) {
error_log("UrlNav: 手动解锁{$cronType}成功");
return array(
'success' => true,
'message' => "{$cronType}定时任务锁已解除",
'timestamp' => time()
);
} else {
error_log("UrlNav: 手动解锁{$cronType}失败");
return array(
'success' => false,
'message' => '无法删除锁文件',
'timestamp' => time()
);
}
} else {
return array(
'success' => true,
'message' => "{$cronType}没有锁文件存在",
'timestamp' => time()
);
}
}
public static function refreshAllRssFeeds($isCron = false)
{
$startTime = microtime(true);
// 🆕 新增:在定时任务中自动清理过期缓存
if ($isCron) {
self::cleanExpiredCache();
}
// 立即设置响应头避免502
if ($isCron && !headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出一些内容让Nginx知道脚本还在运行
echo '{"status":"starting","message":"RSS刷新任务开始...","timestamp":' . time() . '}';
flush();
ob_flush();
}
// 设置更长的执行时间
if ($isCron) {
@set_time_limit(300); // 5分钟
@ini_set('max_execution_time', 300);
}
try {
$db = self::getDbConnection();
// 使用后台配置的数量
$config = self::getConfig();
$limit = intval($config->rssRefreshLimit ?? 10);
$limit = max(1, min($limit, 30)); // 限制在1-30之间
error_log("===== UrlNav RSS刷新开始时间: " . date('Y-m-d H:i:s') . " =====");
error_log("配置数量: {$limit}");
// 优化查询:优先处理从未刷新或很久没刷新的
// 关键修复:添加时间条件,避免重复刷新刚刷过的
$sql = "
SELECT id, rss_url, url, title, last_refresh, failure_count, success_count, created_at
FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND (
-- 从未刷新过的
last_refresh IS NULL
-- 或者超过1小时没刷新的
OR last_refresh < datetime('now', '-1 hour')
-- 或者失败次数多且超过30分钟没重试
OR (failure_count > success_count AND last_refresh < datetime('now', '-30 minutes'))
)
ORDER BY
CASE
-- 最高优先级:从未刷新过的
WHEN last_refresh IS NULL THEN 0
-- 次高优先级:失败次数多于成功次数的
WHEN failure_count > success_count THEN 1
-- 中等优先级新添加的网址最近3天内
WHEN created_at > datetime('now', '-3 days') THEN 2
-- 低优先级:正常的
ELSE 3
END,
-- 按刷新时间从早到晚排序
CASE
WHEN last_refresh IS NULL THEN created_at
ELSE last_refresh
END ASC
LIMIT ?
";
$stmt = $db->prepare($sql);
$stmt->execute(array($limit));
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 如果没有符合条件的,放宽条件选择一些
if (empty($urls)) {
error_log("UrlNav: 没有需要立即刷新的RSS源选择一些较久没刷新的");
$sql = "
SELECT id, rss_url, url, title, last_refresh, failure_count, success_count
FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
ORDER BY last_refresh ASC NULLS FIRST
LIMIT ?
";
$stmt = $db->prepare($sql);
$stmt->execute(array(min($limit, 5))); // 少选几个
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
}
if (empty($urls)) {
error_log("UrlNav: 没有需要刷新的RSS网址");
return array(
'success' => true,
'message' => '没有需要刷新的RSS网址',
'successCount' => 0,
'failureCount' => 0,
'newArticles' => 0,
'totalFeeds' => 0,
'urlCount' => 0,
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
error_log("UrlNav: 获取到 " . count($urls) . " 个需要刷新的RSS源");
// 记录获取到的URL信息
foreach ($urls as $url) {
$refreshStatus = $url['last_refresh'] ?
"最后刷新: " . $url['last_refresh'] :
"从未刷新";
error_log("UrlNav: 选中 - ID: {$url['id']}, {$refreshStatus}, URL: {$url['rss_url']}");
}
$successCount = 0;
$failureCount = 0;
$totalFeeds = 0;
$newArticles = 0;
// 🆕 修改记录成功和失败的RSS地址
$successRssUrls = array();
$failedRssUrls = array();
// 配置参数
$timeout = intval($config->fetchTimeout ?? 15); // 默认15秒
$retryTimes = intval($config->retryTimes ?? 2); // 重试2次
$maxFeeds = intval($config->maxFeedsPerSite ?? 20); // 每个站点20条
// 关键:定期输出内容,保持连接活跃
$lastOutputTime = $startTime;
foreach ($urls as $index => $url) {
$currentTime = microtime(true);
$elapsedTime = $currentTime - $startTime;
// 检查总执行时间4分钟限制
if ($isCron && $elapsedTime > 240) {
error_log('UrlNav: 接近总超时4分钟停止处理');
break;
}
// 每3秒输出一次保持连接活跃防502关键
if ($isCron && ($currentTime - $lastOutputTime) > 3) {
if (!headers_sent()) {
echo '{"status":"processing","progress":"' . ($index+1) . '/' . count($urls) . '","timestamp":' . time() . '}';
flush();
ob_flush();
}
$lastOutputTime = $currentTime;
}
try {
error_log("UrlNav: [开始] 处理RSS #" . ($index+1) . " - ID: " . $url['id'] . ", URL: " . $url['rss_url']);
error_log("UrlNav: 最后刷新时间: " . ($url['last_refresh'] ?: '从未刷新'));
$urlResult = self::refreshSingleRssUrl($url, $timeout, $retryTimes, $maxFeeds);
if ($urlResult['success']) {
$successCount++;
$newArticles += $urlResult['new_articles'];
$totalFeeds += $urlResult['total_feeds'];
// 🆕 记录成功的RSS地址
$successRssUrls[] = $url['rss_url'];
error_log("UrlNav: [成功] ID: " . $url['id'] . ", 新增文章: " . $urlResult['new_articles'] . ", RSS: " . $url['rss_url']);
} else {
$failureCount++;
// 🆕 记录失败的RSS地址
$failedRssUrls[] = $url['rss_url'];
error_log("UrlNav: [失败] ID: " . $url['id'] . ", 错误: " . ($urlResult['error'] ?? '未知错误') . ", RSS: " . $url['rss_url']);
}
// 短暂休息,避免对目标服务器压力过大
if ($index < count($urls) - 1) { // 不是最后一个时休息
usleep(800000); // 0.8秒休息
}
} catch (Exception $e) {
$failureCount++;
// 🆕 记录异常的RSS地址
$failedRssUrls[] = $url['rss_url'] . " [异常]";
error_log('UrlNav: [异常] ID: ' . $url['id'] . ', RSS: ' . $url['rss_url'] . ', 异常: ' . $e->getMessage());
}
}
$duration = round(microtime(true) - $startTime, 2);
// 记录日志 - 现在传递成功和失败的RSS地址
self::logRefresh($isCron ? 'cron' : 'manual', $successCount, $totalFeeds,
count($urls), $newArticles, null, $duration, 'rss',
$successRssUrls, $failedRssUrls);
$message = "刷新完成:成功 {$successCount} 个,失败 {$failureCount}";
$result = array(
'success' => $successCount > 0 || count($urls) == 0,
'successCount' => $successCount,
'failureCount' => $failureCount,
'newArticles' => $newArticles,
'totalFeeds' => $totalFeeds,
'urlCount' => count($urls),
'duration' => $duration,
'message' => $message,
// 🆕 修改返回成功和失败的RSS地址
'successRssUrls' => $successRssUrls,
'failedRssUrls' => $failedRssUrls
);
error_log("UrlNav: [完成] RSS刷新完成耗时 {$duration} 秒,{$message}");
error_log("===== UrlNav RSS刷新结束 =====");
return $result;
} catch (Exception $e) {
error_log('UrlNav: [全局异常] 刷新失败: ' . $e->getMessage());
error_log("===== UrlNav RSS刷新异常结束 =====");
return array(
'success' => false,
'message' => '刷新失败: ' . $e->getMessage(),
'successRssUrls' => array(),
'failedRssUrls' => array()
);
}
}
/**
* 获取RSS刷新状态统计
*/
public static function getRssRefreshStatus()
{
try {
$db = self::getDbConnection();
// 获取统计信息
$stats = array();
// 总RSS源数量
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != ''");
$stats['total_rss_sources'] = $stmt->fetchColumn();
// 从未刷新的数量
$stmt = $db->query("SELECT COUNT(*) as never_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh IS NULL");
$stats['never_refreshed'] = $stmt->fetchColumn();
// 今天刷新的数量
$stmt = $db->query("SELECT COUNT(*) as today_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND date(last_refresh) = date('now')");
$stats['today_refreshed'] = $stmt->fetchColumn();
// 最近7天刷新的数量
$stmt = $db->query("SELECT COUNT(*) as week_refreshed FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh >= datetime('now', '-7 days')");
$stats['week_refreshed'] = $stmt->fetchColumn();
// 最久未刷新的时间
$stmt = $db->query("SELECT MIN(last_refresh) as oldest_refresh FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND last_refresh IS NOT NULL");
$oldest = $stmt->fetchColumn();
$stats['oldest_refresh'] = $oldest;
if ($oldest) {
$stats['oldest_days'] = round((time() - strtotime($oldest)) / 86400, 1);
}
// 需要刷新的数量超过1天没刷新的
$stmt = $db->query("SELECT COUNT(*) as need_refresh FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != '' AND (last_refresh IS NULL OR last_refresh < datetime('now', '-1 day'))");
$stats['need_refresh'] = $stmt->fetchColumn();
// 成功率统计
$stmt = $db->query("SELECT
SUM(success_count) as total_success,
SUM(failure_count) as total_failure,
SUM(refresh_count) as total_refreshes
FROM urlnav_urls WHERE is_active = 1 AND rss_url IS NOT NULL AND TRIM(rss_url) != ''");
$countStats = $stmt->fetch(PDO::FETCH_ASSOC);
$stats['total_success'] = $countStats['total_success'] ?? 0;
$stats['total_failure'] = $countStats['total_failure'] ?? 0;
$stats['total_refreshes'] = $countStats['total_refreshes'] ?? 0;
$stats['success_rate'] = $stats['total_refreshes'] > 0 ?
round(($stats['total_success'] / $stats['total_refreshes']) * 100, 1) : 0;
return $stats;
} catch (Exception $e) {
error_log('UrlNav: 获取刷新状态失败: ' . $e->getMessage());
return array();
}
}
private static function refreshSingleRssUrl($url, $timeout = 8, $retryTimes = 1, $maxFeeds = 10)
{
$urlId = $url['id'];
$rssUrl = trim($url['rss_url']);
error_log("UrlNav: === 开始处理RSS ID: {$urlId} ===");
error_log("UrlNav: RSS URL: {$rssUrl}");
try {
$db = self::getDbConnection();
// 更新刷新统计
$stmt = $db->prepare("UPDATE urlnav_urls SET refresh_count = refresh_count + 1 WHERE id = ?");
$stmt->execute(array($urlId));
error_log("UrlNav: 更新刷新统计成功");
// 解析RSS内容
error_log("UrlNav: 开始解析RSS内容...");
$feeds = self::parseRssFeedWithRetry($rssUrl, $retryTimes, $timeout);
error_log("UrlNav: RSS解析完成获取到 " . count($feeds) . " 篇文章");
if (empty($feeds)) {
error_log("UrlNav: 没有获取到文章数据");
$stmt = $db->prepare("
UPDATE urlnav_urls SET
last_refresh = CURRENT_TIMESTAMP,
last_error = '无可用数据'
WHERE id = ?
");
$stmt->execute(array($urlId));
error_log("UrlNav: === 处理完成(无数据)===");
return array(
'success' => true,
'new_articles' => 0,
'total_feeds' => 0,
'error' => null
);
}
// 限制每个站点最大文章数
$feeds = array_slice($feeds, 0, $maxFeeds);
error_log("UrlNav: 限制后文章数: " . count($feeds));
$addedCount = 0;
foreach ($feeds as $feedIndex => $feed) {
try {
// 确保所有必要字段都有值
$title = !empty($feed['title']) ? substr($feed['title'], 0, 255) : '无标题';
$link = !empty($feed['link']) ? substr($feed['link'], 0, 500) : $url['url'];
$description = !empty($feed['description']) ? substr($feed['description'], 0, 1000) : '';
$fullContent = !empty($feed['full_content']) ? substr($feed['full_content'], 0, 5000) : $description; // 使用完整内容,如果不存在则使用描述
$pubDate = !empty($feed['pubDate']) ? $feed['pubDate'] : date('Y-m-d H:i:s');
$guid = !empty($feed['guid']) ? substr($feed['guid'], 0, 255) : md5($link . $pubDate);
error_log("UrlNav: 处理文章 #" . ($feedIndex+1) . ": {$title}");
// 使用INSERT OR IGNORE避免冲突
$stmt = $db->prepare("
INSERT OR IGNORE INTO urlnav_rss_cache
(url_id, feed_title, feed_link, feed_description, full_content, pub_date, guid, cached_at, is_fresh)
VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, 1)
");
$stmt->execute(array(
$urlId,
$title,
$link,
$description,
$fullContent,
$pubDate,
$guid
));
if ($stmt->rowCount() > 0) {
$addedCount++;
error_log("UrlNav: 文章 #" . ($feedIndex+1) . " 插入成功");
} else {
error_log("UrlNav: 文章 #" . ($feedIndex+1) . " 已存在,跳过");
}
} catch (Exception $e) {
error_log('UrlNav: 文章处理异常: ' . $e->getMessage());
// 继续处理下一篇文章
continue;
}
}
// 更新URL统计信息
$stmt = $db->prepare("
UPDATE urlnav_urls SET
success_count = success_count + 1,
last_refresh = CURRENT_TIMESTAMP,
last_error = NULL
WHERE id = ?
");
$stmt->execute(array($urlId));
error_log("UrlNav: 成功解析RSS - ID: {$urlId}, 获取到 " . count($feeds) . " 篇文章, 新增 {$addedCount}");
error_log("UrlNav: === 处理完成(成功)===");
return array(
'success' => true,
'new_articles' => $addedCount,
'total_feeds' => count($feeds),
'error' => null
);
} catch (Exception $e) {
// 记录错误信息
$errorMessage = substr($e->getMessage(), 0, 500);
error_log("UrlNav: RSS解析失败 - 错误: {$errorMessage}");
$stmt = $db->prepare("
UPDATE urlnav_urls SET
failure_count = failure_count + 1,
last_refresh = CURRENT_TIMESTAMP,
last_error = ?
WHERE id = ?
");
$stmt->execute(array($errorMessage, $urlId));
error_log("UrlNav: === 处理完成(失败)===");
return array(
'success' => false,
'new_articles' => 0,
'total_feeds' => 0,
'error' => $errorMessage
);
}
}
/**
* 获取需要刷新的网址数量 - 新增方法
*/
public static function getUrlsNeedingRefresh()
{
try {
$db = self::getDbConnection();
$sql = "
SELECT COUNT(*) as count FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND (
last_refresh IS NULL
OR last_refresh <= datetime('now', '-1 hour')
)
";
$stmt = $db->query($sql);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['count'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取需要刷新的网址数量失败: ' . $e->getMessage());
return 0;
}
}
/**
* 带重试机制的RSS解析 - 优化版
*/
private static function parseRssFeedWithRetry($rssUrl, $retryTimes = 1, $timeout = 8)
{
$lastError = null;
for ($i = 0; $i <= $retryTimes; $i++) {
try {
if ($i > 0) {
// 重试前等待一段时间
sleep($i * 2);
error_log("UrlNav: RSS重试第{$i}次: {$rssUrl}");
}
$feeds = self::parseRssFeed($rssUrl, $timeout);
return $feeds;
} catch (Exception $e) {
$lastError = $e;
$errorMsg = $e->getMessage();
// 如果是DNS错误尝试使用IP直接访问针对特定域名
if (strpos($errorMsg, 'getaddrinfo failed') !== false && strpos($rssUrl, 'windful.cn') !== false) {
// 尝试使用IP访问需要你知道windful.cn的IP
// $rssUrl = str_replace('https://windful.cn/', 'https://[IP地址]/', $rssUrl);
error_log("UrlNav: DNS解析失败建议检查windful.cn域名是否正常");
}
if ($i < $retryTimes) {
error_log("UrlNav: RSS解析失败" . ($i+1) . "次重试: " . $errorMsg);
}
}
}
// 所有重试都失败
throw new Exception("RSS解析失败: " . $lastError->getMessage());
}
/**
* 解析RSS源 - 完整功能增强版(修改全文字段逻辑)
*/
private static function parseRssFeed($rssUrl, $timeout = 8)
{
error_log("UrlNav: >>> 开始解析RSS: {$rssUrl}");
try {
// 设置超时时间(保持原样)
$context = stream_context_create(array(
'http' => array(
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n"
),
'ssl' => array(
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
)
));
error_log("UrlNav: 尝试获取RSS内容...");
$content = @file_get_contents($rssUrl, false, $context);
if ($content === false) {
$error = error_get_last();
$errorMsg = $error['message'] ?? '未知错误';
error_log("UrlNav: file_get_contents失败: {$errorMsg}");
if (isset($http_response_header)) {
error_log("UrlNav: HTTP响应头: " . implode(" | ", $http_response_header));
}
throw new Exception('无法获取RSS内容: ' . $errorMsg);
}
error_log("UrlNav: 获取内容成功,长度: " . strlen($content) . " 字节");
// 检查HTTP状态码保持原样
if (isset($http_response_header[0])) {
error_log("UrlNav: HTTP状态: {$http_response_header[0]}");
if (strpos($http_response_header[0], '404') !== false) {
throw new Exception('RSS源不存在 (404)');
}
if (strpos($http_response_header[0], '403') !== false) {
throw new Exception('拒绝访问 (403)');
}
if (strpos($http_response_header[0], '500') !== false) {
throw new Exception('服务器内部错误 (500)');
}
}
if (empty($content) || trim($content) === '') {
error_log("UrlNav: RSS内容为空");
throw new Exception('RSS内容为空');
}
// 处理可能存在的BOM头保持原样
if (substr($content, 0, 3) == "\xEF\xBB\xBF") {
$content = substr($content, 3);
error_log("UrlNav: 已移除BOM头");
}
// 简单的XML修复保持原样
$content = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', '', $content);
$content = preg_replace('/&(?!(amp|lt|gt|quot|apos|#\d+);)/', '&amp;', $content);
// 🆕 增强尝试多种XML解析方式确保兼容性
libxml_use_internal_errors(true);
libxml_clear_errors();
$xml = null;
// 方式1先尝试DOMDocument最兼容WordPress/Typecho
try {
$dom = new DOMDocument();
$dom->recover = true;
$dom->strictErrorChecking = false;
if (@$dom->loadXML($content)) {
error_log("UrlNav: 使用DOMDocument解析成功");
$xml = simplexml_import_dom($dom);
}
} catch (Exception $e) {
error_log("UrlNav: DOMDocument解析失败: " . $e->getMessage());
}
// 方式2如果DOMDocument失败使用SimpleXML
if ($xml === null) {
error_log("UrlNav: 尝试SimpleXML解析...");
$xml = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOCDATA);
}
if ($xml === false) {
$errorMsg = 'XML解析失败';
$xmlErrors = libxml_get_errors();
if (!empty($xmlErrors)) {
$errorMsg .= ': ' . $xmlErrors[0]->message;
error_log("UrlNav: XML错误: " . $xmlErrors[0]->message);
}
libxml_clear_errors();
if (strpos($content, '<html') !== false) {
$errorMsg .= ' (可能是HTML页面而非RSS)';
}
throw new Exception($errorMsg);
}
libxml_clear_errors();
$feeds = array();
// 获取配置(保持原样)
$config = self::getConfig();
$fullTextPerSite = intval($config->fullTextPerSite ?? 3);
$pageFetchTimeout = intval($config->pageFetchTimeout ?? 10);
$fullTextCount = 0; // 计数器
// 检查是否在白名单中(保持原样)
$selector = self::isInFullTextWhitelist($rssUrl);
$isInWhitelist = ($selector !== false);
error_log("UrlNav: 白名单检查 - 是否在白名单: " . ($isInWhitelist ? '是' : '否') .
($isInWhitelist ? ",选择器: {$selector}" : ""));
// ========== RSS格式解析 ==========
if (isset($xml->channel) && isset($xml->channel->item)) {
error_log("UrlNav: 检测到RSS格式 (channel->item)");
foreach ($xml->channel->item as $itemIndex => $item) {
// 🆕 增强安全处理每个item防止一个item失败影响全部
try {
// 基础内容获取(保持原样)
$fullContent = '';
$description = isset($item->description) ? (string)$item->description : '';
$articleTitle = isset($item->title) ? (string)$item->title : '无标题文章';
$articleLink = isset($item->link) ? (string)$item->link : '';
// 确保标题不为空(保持原样)
if (empty($articleTitle)) {
$articleTitle = '未命名文章 ' . date('Y-m-d H:i:s');
}
// 如果链接为空尝试使用guid保持原样
if (empty($articleLink) && isset($item->guid)) {
$articleLink = (string)$item->guid;
}
error_log("UrlNav: 处理文章: {$articleTitle}");
// 🆕 增强更好的content:encoded提取处理WordPress/Typecho
$namespaces = $item->getNamespaces(true);
// 1. 优先获取content:encodedWordPress完整内容
$encodedContent = '';
if (isset($namespaces['content'])) {
$contentNs = $item->children($namespaces['content']);
if (isset($contentNs->encoded)) {
$encodedContent = (string)$contentNs->encoded;
if (!empty($encodedContent) && trim($encodedContent) !== '') {
$fullContent = $encodedContent;
error_log("UrlNav: ✓ 找到content:encoded完整内容长度: " . strlen($fullContent));
}
}
}
// 2. 如果没有content:encoded使用description
if (empty($fullContent) && !empty($description)) {
$fullContent = $description;
error_log("UrlNav: 使用description作为内容长度: " . strlen($description));
}
// 3. 尝试dc:description命名空间
if (empty($fullContent) && isset($namespaces['dc'])) {
$dcNs = $item->children($namespaces['dc']);
if (isset($dcNs->description) && !empty((string)$dcNs->description)) {
$fullContent = (string)$dcNs->description;
error_log("UrlNav: 找到dc:description内容");
}
}
// 4. 尝试item的直接子元素保持原样
if (empty($fullContent)) {
foreach ($item->children() as $child) {
$childName = $child->getName();
$childContent = (string)$child;
// 跳过已知的短字段
if (in_array($childName, ['title', 'link', 'guid', 'pubDate', 'author', 'category'])) {
continue;
}
// 如果子元素内容较长,可能是文章内容
if (strlen($childContent) > 100) {
$fullContent = $childContent;
error_log("UrlNav: 从子元素 {$childName} 提取内容");
break;
}
}
}
// ===== 页面抓取判断逻辑(完全保持不变) =====
$pageContent = null;
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$articleTitle}");
} else {
$needPageFetch = false;
if ($isInWhitelist && $fullTextCount >= $fullTextPerSite) {
error_log("UrlNav: 已达白名单抓取限制({$fullTextCount}/{$fullTextPerSite}),跳过");
} elseif (!$isInWhitelist) {
error_log("UrlNav: 非白名单网站使用RSS摘要{$rssContentLength}字符),不抓取全文");
}
}
// 执行页面抓取(仅白名单)
if ($needPageFetch && !empty($articleLink)) {
// 短暂延迟,避免对服务器压力过大
if ($itemIndex > 0) {
usleep(rand(300000, 800000)); // 300-800ms延迟
}
// 使用选择器抓取
$pageContent = self::fetchFullContentWithSelector($articleLink, $selector, $pageFetchTimeout);
if (!empty($pageContent)) {
$pageLength = strlen($pageContent);
if ($pageLength > $rssContentLength + 300) {
$fullContent = $pageContent;
error_log("UrlNav: ✓ 页面抓取成功,获得 {$pageLength} 字符内容");
} elseif ($pageLength > 0) {
// 合并内容
$fullContent = $fullContent . "\n\n[页面补充内容]\n" . $pageContent;
error_log("UrlNav: ✓ 合并页面内容,总长度: " . strlen($fullContent));
} else {
error_log("UrlNav: ✗ 页面抓取未获得内容");
}
} else {
error_log("UrlNav: ✗ 页面抓取失败");
}
}
// ===== 页面抓取逻辑结束 =====
// 🔴 修改:非白名单网站全文字段处理逻辑
if (!$isInWhitelist) {
// 非白名单网站,判断 description 或 content:encoded 是否大于500字
$descriptionLength = strlen($description);
$encodedContentLength = strlen($encodedContent);
// 只要 description 或 content:encoded 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $encodedContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容清理和截断
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: 内容过长已截断至10000字符");
}
error_log("UrlNav: 非白名单网站description({$descriptionLength})或content:encoded({$encodedContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav: 非白名单网站description({$descriptionLength})和content:encoded({$encodedContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
if (!empty($fullContent)) {
// 移除过多的空白字符
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
// 截断到合理长度
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: 内容过长已截断至10000字符");
}
} else {
error_log("UrlNav: 警告:未找到任何内容");
$fullContent = $description;
}
}
// 获取发布时间(保持原样)
$pubDate = date('Y-m-d H:i:s', strtotime((string)$item->pubDate));
// 获取GUID保持原样
$guid = (string)$item->guid;
$feeds[] = array(
'title' => $articleTitle,
'link' => $articleLink,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => $pubDate,
'guid' => $guid
);
error_log("UrlNav: ✓ 文章解析完成: {$articleTitle}");
} catch (Exception $e) {
// 🆕 增强单个item失败不影响其他item
error_log("UrlNav: 文章处理失败,跳过: " . $e->getMessage());
continue;
}
}
}
// ========== Atom格式解析保持原样但应用相同逻辑修改 ==========
elseif (isset($xml->entry) || ($xml->getName() == 'feed' && isset($xml->children('http://www.w3.org/2005/Atom')->entry))) {
error_log("UrlNav: 检测到Atom格式");
// 获取所有entry元素保持原样
$entries = isset($xml->entry) ? $xml->entry : $xml->children('http://www.w3.org/2005/Atom')->entry;
foreach ($entries as $entryIndex => $entry) {
$link = '';
$title = '';
$description = '';
$fullContent = '';
$pubDate = '';
$guid = '';
// 获取链接(完全保持不变)
if (isset($entry->link)) {
foreach ($entry->link as $linkElem) {
$attributes = $linkElem->attributes();
if ((string)$attributes['rel'] == 'alternate' || empty((string)$attributes['rel'])) {
$link = (string)$attributes['href'];
break;
}
}
}
// 如果没有找到链接使用id作为链接完全保持不变
if (empty($link) && isset($entry->id)) {
$link = (string)$entry->id;
}
// 获取标题(完全保持不变)
if (isset($entry->title)) {
$title = (string)$entry->title;
}
// 获取描述summary完全保持不变
if (isset($entry->summary)) {
$description = (string)$entry->summary;
}
// ===== Atom全文抓取 =====
// 1. 优先获取content元素完全保持不变
$atomContent = '';
if (isset($entry->content)) {
$contentElem = $entry->content;
$attributes = $contentElem->attributes();
// 检查type属性
$type = (string)($attributes['type'] ?? '');
if ($type === 'html' || $type === 'xhtml' || empty($type)) {
$atomContent = (string)$contentElem;
$fullContent = $atomContent;
error_log("UrlNav: 找到Atom content完整内容类型: {$type},长度: " . strlen($fullContent));
} elseif ($type === 'text') {
$atomContent = htmlspecialchars((string)$contentElem);
$fullContent = $atomContent;
error_log("UrlNav: 找到Atom text内容长度: " . strlen($fullContent));
}
}
// 2. 如果没有content尝试summary完全保持不变
if (empty($fullContent) && isset($entry->summary)) {
$fullContent = $description;
error_log("UrlNav: 使用Atom summary作为内容长度: " . strlen($fullContent));
}
// 3. 检查是否有CDATA包裹完全保持不变
if (!empty($fullContent) && strpos($fullContent, '<![CDATA[') !== false) {
if (preg_match('/<!\[CDATA\[(.*?)\]\]>/s', $fullContent, $matches)) {
$fullContent = $matches[1];
error_log("UrlNav: 从CDATA提取Atom内容");
}
}
// ===== Atom格式的页面抓取判断完全保持不变 =====
$pageContent = null;
$atomContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav Atom: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$title}");
} else {
$needPageFetch = false;
error_log("UrlNav Atom: " . ($isInWhitelist ? "已达限制" : "非白名单") . "使用Atom内容{$atomContentLength}字符)");
}
// 执行Atom页面抓取仅白名单
if ($needPageFetch && !empty($link)) {
if ($entryIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($link, $selector, $pageFetchTimeout);
if (!empty($pageContent) && strlen($pageContent) > $atomContentLength + 300) {
$fullContent = $pageContent;
error_log("UrlNav: ✓ Atom页面抓取成功");
}
}
// ===== Atom页面抓取结束 =====
// 🔴 修改Atom格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 summary 或 content 是否大于500字
$descriptionLength = strlen($description);
$atomContentLength = strlen($atomContent);
// 只要 summary 或 content 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $atomContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: Atom内容过长已截断");
}
error_log("UrlNav Atom: 非白名单网站summary({$descriptionLength})或content({$atomContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Atom: 非白名单网站summary({$descriptionLength})和content({$atomContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 4. 内容截断(完全保持不变)
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
error_log("UrlNav: Atom内容过长已截断");
}
}
// ===== Atom全文抓取结束 =====
// 获取发布时间updated或published完全保持不变
if (isset($entry->updated)) {
$pubDate = date('Y-m-d H:i:s', strtotime((string)$entry->updated));
} elseif (isset($entry->published)) {
$pubDate = date('Y-m-d H:i:s', strtotime((string)$entry->published));
} else {
$pubDate = date('Y-m-d H:i:s');
}
// 获取guidid完全保持不变
if (isset($entry->id)) {
$guid = (string)$entry->id;
} else {
$guid = md5($link . $pubDate);
}
$feeds[] = array(
'title' => $title,
'link' => $link,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => $pubDate,
'guid' => $guid
);
}
}
// ========== 其他RSS格式解析保持原样但应用相同逻辑修改 ==========
elseif (isset($xml->item)) {
error_log("UrlNav: 检测到RSS格式 (直接item)");
foreach ($xml->item as $itemIndex => $item) {
// 优先获取完整内容
$fullContent = '';
$description = isset($item->description) ? (string)$item->description : '';
$articleTitle = (string)$item->title;
$articleLink = (string)$item->link;
// 尝试获取content:encoded完整内容
$encodedContent = '';
$namespaces = $item->getNamespaces(true);
if (isset($namespaces['content'])) {
$contentNs = $item->children($namespaces['content']);
if (isset($contentNs->encoded)) {
$encodedContent = (string)$contentNs->encoded;
$fullContent = $encodedContent;
error_log("UrlNav: 找到content:encoded完整内容");
}
}
// 如果没找到content:encoded使用description
if (empty($fullContent) && !empty($description)) {
$fullContent = $description;
error_log("UrlNav: 使用description作为内容");
}
// ===== 其他格式的页面抓取判断 =====
$pageContent = null;
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
error_log("UrlNav Other: 白名单抓取全文 #{$fullTextCount}/{$fullTextPerSite} - {$articleTitle}");
} else {
$needPageFetch = false;
}
// 页面抓取(仅白名单)
if ($needPageFetch && !empty($articleLink)) {
if ($itemIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($articleLink, $selector, $pageFetchTimeout);
if (!empty($pageContent) && strlen($pageContent) > strlen($fullContent) + 300) {
$fullContent = $pageContent;
}
}
// ===== 其他格式页面抓取结束 =====
// 🔴 修改:其他格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 description 或 content:encoded 是否大于500字
$descriptionLength = strlen($description);
$encodedContentLength = strlen($encodedContent);
// 只要 description 或 content:encoded 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $encodedContentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
error_log("UrlNav Other: 非白名单网站description({$descriptionLength})或content:encoded({$encodedContentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Other: 非白名单网站description({$descriptionLength})和content:encoded({$encodedContentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
}
$feeds[] = array(
'title' => $articleTitle,
'link' => $articleLink,
'description' => $description,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => date('Y-m-d H:i:s', strtotime((string)$item->pubDate)),
'guid' => (string)$item->guid
);
}
}
// ========== 尝试检测命名空间(保持原样但应用相同逻辑修改) ==========
else {
// 检查是否有Atom命名空间
$namespaces = $xml->getNamespaces(true);
foreach ($namespaces as $ns) {
if (strpos($ns, 'www.w3.org/2005/Atom') !== false) {
$atom = $xml->children($ns);
if (isset($atom->entry)) {
error_log("UrlNav: 检测到Atom命名空间格式");
foreach ($atom->entry as $entryIndex => $entry) {
$entry = $entry->children($ns);
// 获取完整内容
$fullContent = '';
$atomContent = '';
$entryDescription = '';
if (isset($entry->content)) {
$atomContent = (string)$entry->content;
$fullContent = $atomContent;
}
if (isset($entry->summary)) {
$entryDescription = (string)$entry->summary;
if (empty($fullContent)) {
$fullContent = $entryDescription;
}
}
// ===== 命名空间格式的页面抓取判断 =====
$needPageFetch = false;
$entryLink = isset($entry->link) ? (string)$entry->link : '';
$rssContentLength = strlen($fullContent);
// 判断逻辑:只有在白名单中且未超过限制才抓取
if ($isInWhitelist && $fullTextCount < $fullTextPerSite) {
$needPageFetch = true;
$fullTextCount++;
}
// 页面抓取(仅白名单)
if ($needPageFetch && !empty($entryLink)) {
if ($entryIndex > 0) {
usleep(rand(300000, 800000));
}
$pageContent = self::fetchFullContentWithSelector($entryLink, $selector, $pageFetchTimeout);
if (!empty($pageContent)) {
$fullContent = $pageContent;
}
}
// ===== 命名空间格式页面抓取结束 =====
// 🔴 修改:命名空间格式的非白名单网站全文字段处理
if (!$isInWhitelist) {
// 非白名单网站,判断 summary 或 content 是否大于500字
$descriptionLength = strlen($entryDescription);
$contentLength = strlen($atomContent);
// 只要 summary 或 content 任意一个大于500字就存入全文
if ($descriptionLength >= 500 || $contentLength >= 500) {
// 有足够长的内容,存入全文字段
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
error_log("UrlNav Namespace: 非白名单网站summary({$descriptionLength})或content({$contentLength})长度≥500字存入全文字段");
} else {
// 内容太短,留空不存储
$fullContent = '';
error_log("UrlNav Namespace: 非白名单网站summary({$descriptionLength})和content({$contentLength})都小于500字全文字段留空");
}
} else {
// 白名单网站保持原有逻辑
// 内容截断
if (!empty($fullContent) && strlen($fullContent) > 10000) {
$fullContent = substr($fullContent, 0, 10000) . '... [内容已截断]';
}
}
$feeds[] = array(
'title' => isset($entry->title) ? (string)$entry->title : '',
'link' => $entryLink,
'description' => $entryDescription,
'full_content' => $fullContent, // 🔴 现在非白名单网站可能为空
'pubDate' => isset($entry->updated) ? date('Y-m-d H:i:s', strtotime((string)$entry->updated)) : date('Y-m-d H:i:s'),
'guid' => isset($entry->id) ? (string)$entry->id : ''
);
}
break;
}
}
}
if (empty($feeds)) {
error_log("UrlNav: 无法识别的RSS格式");
throw new Exception('无法识别的RSS格式');
}
}
if (empty($feeds)) {
error_log("UrlNav: RSS中没有找到文章内容");
throw new Exception('RSS中没有找到文章内容');
}
error_log("UrlNav: 找到 " . count($feeds) . " 篇文章");
error_log("UrlNav: <<< RSS解析成功");
return $feeds;
} catch (Exception $e) {
error_log("UrlNav: <<< RSS解析失败: " . $e->getMessage());
throw new Exception("解析RSS失败 [{$rssUrl}]: " . $e->getMessage());
}
}
/**
* 提取CDATA内容处理多层或不规范CDATA
* @param string $content 原始内容
* @param string $source 来源标识(用于日志)
* @return string 处理后的内容
*/
private static function extractCdataContent($content, $source = '')
{
if (empty($content)) {
return $content;
}
// 如果内容包含CDATA标记
if (strpos($content, '<![CDATA[') !== false) {
$cdataCount = 0;
$originalLength = strlen($content);
// 尝试最多5次提取CDATA内容
while (strpos($content, '<![CDATA[') !== false && $cdataCount < 5) {
$cdataCount++;
// 方法1正则提取最内层CDATA
if (preg_match('/<!\[CDATA\[(.*?)\]\]>/s', $content, $matches)) {
$extracted = $matches[1];
// 如果提取的内容明显比原来短说明CDATA格式正确
if (strlen($extracted) < strlen($content) * 0.9 && strlen($extracted) > 50) {
$content = $extracted;
error_log("UrlNav: 从CDATA提取 {$source} 内容 (第{$cdataCount}次)");
} else {
// CDATA可能嵌套或不规范尝试移除CDATA标记
$content = str_replace('<![CDATA[', '', $content);
$content = str_replace(']]>', '', $content);
error_log("UrlNav: 清理不规范的CDATA标记");
break;
}
} else {
// CDATA格式不正确直接移除标记
$content = str_replace('<![CDATA[', '', $content);
$content = str_replace(']]>', '', $content);
error_log("UrlNav: 清理不规范的CDATA标记");
break;
}
}
$finalLength = strlen($content);
if ($originalLength != $finalLength) {
error_log("UrlNav: CDATA处理完成 {$source},从 {$originalLength}{$finalLength} 字符");
}
}
return $content;
}
/**
* 从文章页面抓取完整内容
* @param string $articleUrl 文章链接
* @param string $title 文章标题(用于日志)
* @param int $timeout 超时时间(秒)
* @return string|null 抓取到的内容失败返回null
*/
private static function fetchFullContentFromPage($articleUrl, $title = '', $timeout = 10)
{
error_log("UrlNav: 尝试从页面抓取完整内容: {$articleUrl}");
try {
// 设置请求头,模拟浏览器
$context = stream_context_create([
'http' => [
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n" .
"Accept-Encoding: gzip\r\n" .
"Connection: close\r\n" .
"Upgrade-Insecure-Requests: 1",
'method' => 'GET'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
]
]);
$html = @file_get_contents($articleUrl, false, $context);
if ($html === false) {
$error = error_get_last();
error_log("UrlNav: 无法访问文章页面: " . ($error['message'] ?? '未知错误'));
return null;
}
if (empty($html)) {
error_log("UrlNav: 文章页面内容为空");
return null;
}
$htmlLength = strlen($html);
error_log("UrlNav: 获取页面成功,长度: {$htmlLength} 字节");
// 转换编码为UTF-8如果检测到其他编码
$encoding = 'UTF-8';
if (preg_match('/<meta[^>]*charset=["\']?([a-zA-Z0-9\-_]+)["\']?/i', $html, $matches)) {
$encoding = strtoupper($matches[1]);
if ($encoding !== 'UTF-8') {
$html = mb_convert_encoding($html, 'UTF-8', $encoding);
error_log("UrlNav: 检测到编码 {$encoding}已转换为UTF-8");
}
}
// 提取内容
$fullContent = '';
// 方法1尝试提取Open Graph描述
if (preg_match('/<meta\s+property="og:description"\s+content="([^"]+)"/i', $html, $matches)) {
$fullContent = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav: 从og:description提取内容长度: " . strlen($fullContent));
}
// 方法2尝试提取meta description
if (empty($fullContent) && preg_match('/<meta\s+name="description"\s+content="([^"]+)"/i', $html, $matches)) {
$fullContent = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav: 从meta description提取内容长度: " . strlen($fullContent));
}
// 方法3尝试提取文章主要内容区域
if (empty($fullContent) || strlen($fullContent) < 500) {
// 常见的文章内容CSS类/ID模式
$contentPatterns = [
// WordPress主题
'/<article[^>]*>(.*?)<\/article>/is',
'/<div\s+class="[^"]*post-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*entry-content[^"]*"[^>]*>(.*?)<\/div>/is',
// 通用内容区域
'/<div\s+class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+id="content"[^>]*>(.*?)<\/div>/is',
// Typecho主题
'/<div\s+class="[^"]*post[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-body[^"]*"[^>]*>(.*?)<\/div>/is',
// 其他常见模式
'/<div\s+class="[^"]*article-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*blog-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-entry[^"]*"[^>]*>(.*?)<\/div>/is'
];
foreach ($contentPatterns as $pattern) {
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
$extracted = $matches[1];
// 移除脚本和样式
$extracted = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $extracted);
$extracted = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $extracted);
$extracted = preg_replace('/<!--.*?-->/s', '', $extracted);
// 提取纯文本,但保留段落结构
$extracted = strip_tags($extracted, '<p><br><div><h1><h2><h3><h4><h5><h6><ul><ol><li>');
$extracted = preg_replace('/\s+/', ' ', $extracted);
$extracted = trim($extracted);
if (strlen($extracted) > 300) {
$fullContent = $extracted;
error_log("UrlNav: 从页面提取主要内容,长度: " . strlen($fullContent));
break;
}
}
}
}
// 方法4提取所有段落文本
if (empty($fullContent) || strlen($fullContent) < 500) {
if (preg_match_all('/<p[^>]*>(.*?)<\/p>/is', $html, $paragraphs)) {
$allText = '';
foreach ($paragraphs[1] as $para) {
$paraText = strip_tags($para);
$paraText = preg_replace('/\s+/', ' ', $paraText);
$paraText = trim($paraText);
if (strlen($paraText) > 50) { // 只保留有意义的段落
$allText .= $paraText . "\n\n";
}
}
if (strlen($allText) > 500) {
$fullContent = $allText;
error_log("UrlNav: 从段落提取文本,长度: " . strlen($fullContent));
}
}
}
// 方法5最后尝试提取整个body文本
if (empty($fullContent) || strlen($fullContent) < 500) {
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
$bodyText = $matches[1];
// 清理
$bodyText = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $bodyText);
$bodyText = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $bodyText);
$bodyText = preg_replace('/<!--.*?-->/s', '', $bodyText);
$bodyText = preg_replace('/<header[^>]*>.*?<\/header>/is', '', $bodyText);
$bodyText = preg_replace('/<footer[^>]*>.*?<\/footer>/is', '', $bodyText);
$bodyText = preg_replace('/<nav[^>]*>.*?<\/nav>/is', '', $bodyText);
// 提取文本
$bodyText = strip_tags($bodyText);
$bodyText = preg_replace('/\s+/', ' ', $bodyText);
$bodyText = trim($bodyText);
if (strlen($bodyText) > 500) {
// 尝试提取核心部分(去掉导航、页脚等)
$bodyText = preg_replace('/首页|关于|联系我们|版权|©|Copyright|all rights reserved/i', '', $bodyText);
$fullContent = $bodyText;
error_log("UrlNav: 从body提取文本长度: " . strlen($fullContent));
}
}
}
// 内容清理和截断
if (!empty($fullContent)) {
// 移除过多的空白字符
$fullContent = preg_replace('/\s+/', ' ', $fullContent);
$fullContent = trim($fullContent);
// 移除常见噪音
$noisePatterns = [
'/分享到.*?(?:微信|微博|QQ|Twitter|Facebook)/',
'/阅读\s*\d+\s*次/',
'/点赞\s*\d+\s*次/',
'/本文由.*?创作/',
'/版权声明.*/',
'/转载请注明出处.*/',
'/相关文章推荐.*/',
'/你可能也喜欢.*/'
];
foreach ($noisePatterns as $pattern) {
$fullContent = preg_replace($pattern, '', $fullContent);
}
// 截断到合理长度
if (strlen($fullContent) > 15000) {
$fullContent = substr($fullContent, 0, 15000) . '... [页面内容已截断]';
}
$finalLength = strlen($fullContent);
error_log("UrlNav: 页面抓取成功,获得 {$finalLength} 字符内容");
return $fullContent;
}
error_log("UrlNav: 无法从页面提取有效内容");
return null;
} catch (Exception $e) {
error_log("UrlNav: 页面抓取异常: " . $e->getMessage());
return null;
}
}
/**
* 智能全文抓取(带选择器)
* @param string $articleUrl 文章链接
* @param string $selector CSS选择器
* @param int $timeout 超时时间(秒)
* @return string|null 抓取到的内容失败返回null
*/
public static function fetchFullContentWithSelector($articleUrl, $selector, $timeout = 10)
{
error_log("UrlNav: 使用选择器抓取全文: {$articleUrl} [选择器: {$selector}]");
try {
// 获取页面HTML
$html = self::fetchHtmlContent($articleUrl, $timeout);
if (empty($html)) {
error_log("UrlNav: 无法获取页面HTML");
return null;
}
$htmlLength = strlen($html);
error_log("UrlNav: 获取页面成功,长度: {$htmlLength} 字节");
// 解析选择器并提取内容
$content = self::extractContentBySelector($html, $selector);
if (!empty($content)) {
$contentLength = strlen($content);
error_log("UrlNav: ✓ 使用选择器 {$selector} 抓取成功,长度: {$contentLength}");
return $content;
}
// 选择器失败,尝试备用方法
error_log("UrlNav: 选择器 {$selector} 未找到内容,尝试备用方法");
$fallbackContent = self::extractContentFallback($html);
if (!empty($fallbackContent)) {
$fallbackLength = strlen($fallbackContent);
error_log("UrlNav: ✓ 备用方法抓取成功,长度: {$fallbackLength}");
return $fallbackContent;
}
error_log("UrlNav: ✗ 所有提取方法都失败");
return null;
} catch (Exception $e) {
error_log("UrlNav: 智能抓取失败: " . $e->getMessage());
return null;
}
}
/**
* 获取HTML内容
* @param string $url 页面URL
* @param int $timeout 超时时间
* @return string|null 页面HTML内容
*/
private static function fetchHtmlContent($url, $timeout)
{
$context = stream_context_create([
'http' => [
'timeout' => $timeout,
'ignore_errors' => true,
'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\r\n" .
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\n" .
"Accept-Language: zh-CN,zh;q=0.9,en;q=0.8\r\n" .
"Accept-Encoding: gzip\r\n" .
"Connection: close\r\n" .
"Upgrade-Insecure-Requests: 1",
'method' => 'GET'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
'allow_self_signed' => true
]
]);
// 只读取前100KB减少内存占用
$content = @file_get_contents($url, false, $context, 0, 102400);
if ($content === false) {
$error = error_get_last();
error_log("UrlNav: file_get_contents失败: " . ($error['message'] ?? '未知错误'));
return null;
}
return $content;
}
/**
* 根据CSS选择器提取内容
* @param string $html HTML内容
* @param string $selector CSS选择器.class, #id, tag
* @return string|null 提取的内容
*/
private static function extractContentBySelector($html, $selector)
{
// 去除首尾空格
$selector = trim($selector);
// 类选择器 .class
if (strpos($selector, '.') === 0) {
$className = substr($selector, 1);
// 模式1: class="... className ..."
$pattern1 = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sclass="[^"]*' . preg_quote($className, '/') . '[^"]*"[^>]*>(.*?)<\/\1>/is';
// 模式2: class="className"
$pattern2 = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sclass="' . preg_quote($className, '/') . '"[^>]*>(.*?)<\/\1>/is';
// 尝试第一个模式
if (preg_match($pattern1, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用类选择器模式1找到内容标签: {$matches[1]}");
}
// 尝试第二个模式
elseif (preg_match($pattern2, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用类选择器模式2找到内容标签: {$matches[1]}");
} else {
error_log("UrlNav: 类选择器 {$selector} 未匹配到内容");
return null;
}
}
// ID选择器 #id
elseif (strpos($selector, '#') === 0) {
$id = substr($selector, 1);
$pattern = '/<([a-zA-Z][a-zA-Z0-9]*)[^>]*\sid="' . preg_quote($id, '/') . '"[^>]*>(.*?)<\/\1>/is';
if (preg_match($pattern, $html, $matches)) {
$content = $matches[2];
error_log("UrlNav: 使用ID选择器找到内容标签: {$matches[1]}");
} else {
error_log("UrlNav: ID选择器 {$selector} 未匹配到内容");
return null;
}
}
// 标签选择器
else {
$pattern = '/<' . preg_quote($selector, '/') . '[^>]*>(.*?)<\/' . preg_quote($selector, '/') . '>/is';
if (preg_match($pattern, $html, $matches)) {
$content = $matches[1];
error_log("UrlNav: 使用标签选择器找到内容");
} else {
error_log("UrlNav: 标签选择器 {$selector} 未匹配到内容");
return null;
}
}
// 清理内容
$content = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $content);
$content = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $content);
$content = preg_replace('/<!--.*?-->/s', '', $content);
// 保留基本的HTML标签
$content = strip_tags($content, '<p><br><div><span><h1><h2><h3><h4><h5><h6><ul><ol><li><strong><em><b><i><a><img>');
// 清理空白字符
$content = preg_replace('/\s+/', ' ', $content);
$content = trim($content);
// 如果内容太短,可能不是真正的文章内容
if (strlen($content) < 100) {
error_log("UrlNav: 选择器提取的内容太短(" . strlen($content) . "字符),可能不是文章内容");
return null;
}
return $content;
}
/**
* 备用提取方法(原有的提取逻辑)
* @param string $html HTML内容
* @return string|null 提取的内容
*/
private static function extractContentFallback($html)
{
$content = '';
// 方法1尝试提取Open Graph描述
if (preg_match('/<meta\s+property="og:description"\s+content="([^"]+)"/i', $html, $matches)) {
$content = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav 备用方法: 从og:description提取内容长度: " . strlen($content));
}
// 方法2尝试提取meta description
if (empty($content) && preg_match('/<meta\s+name="description"\s+content="([^"]+)"/i', $html, $matches)) {
$content = html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8');
error_log("UrlNav 备用方法: 从meta description提取内容长度: " . strlen($content));
}
// 方法3尝试提取常见内容区域
if (empty($content) || strlen($content) < 500) {
$contentPatterns = [
'/<article[^>]*>(.*?)<\/article>/is',
'/<div\s+class="[^"]*post-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*entry-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+id="content"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-body[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*article-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*blog-content[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*post-entry[^"]*"[^>]*>(.*?)<\/div>/is',
'/<div\s+class="[^"]*markdown-body[^"]*"[^>]*>(.*?)<\/div>/is', // 新增GitHub风格
'/<div\s+class="[^"]*article-body[^"]*"[^>]*>(.*?)<\/div>/is', // 新增:通用文章体
'/<main[^>]*>(.*?)<\/main>/is', // 新增main标签
'/<section[^>]*>(.*?)<\/section>/is' // 新增section标签
];
foreach ($contentPatterns as $pattern) {
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
$extracted = $matches[1];
// 清理
$extracted = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $extracted);
$extracted = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $extracted);
$extracted = preg_replace('/<!--.*?-->/s', '', $extracted);
// 提取纯文本,保留段落结构
$extracted = strip_tags($extracted, '<p><br><div><h1><h2><h3><h4><h5><h6><ul><ol><li>');
$extracted = preg_replace('/\s+/', ' ', $extracted);
$extracted = trim($extracted);
if (strlen($extracted) > 300) {
$content = $extracted;
error_log("UrlNav 备用方法: 从页面提取主要内容,模式: " . substr($pattern, 0, 50) . "...,长度: " . strlen($content));
break;
}
}
}
}
// 方法4提取所有段落文本
if (empty($content) || strlen($content) < 500) {
if (preg_match_all('/<p[^>]*>(.*?)<\/p>/is', $html, $paragraphs)) {
$allText = '';
foreach ($paragraphs[1] as $para) {
$paraText = strip_tags($para);
$paraText = preg_replace('/\s+/', ' ', $paraText);
$paraText = trim($paraText);
if (strlen($paraText) > 50) {
$allText .= $paraText . "\n\n";
}
}
if (strlen($allText) > 500) {
$content = $allText;
error_log("UrlNav 备用方法: 从段落提取文本,长度: " . strlen($content));
}
}
}
// 方法5提取整个body文本
if (empty($content) || strlen($content) < 500) {
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
$bodyText = $matches[1];
// 清理
$bodyText = preg_replace('/<script[^>]*>.*?<\/script>/is', '', $bodyText);
$bodyText = preg_replace('/<style[^>]*>.*?<\/style>/is', '', $bodyText);
$bodyText = preg_replace('/<!--.*?-->/s', '', $bodyText);
$bodyText = preg_replace('/<header[^>]*>.*?<\/header>/is', '', $bodyText);
$bodyText = preg_replace('/<footer[^>]*>.*?<\/footer>/is', '', $bodyText);
$bodyText = preg_replace('/<nav[^>]*>.*?<\/nav>/is', '', $bodyText);
// 提取文本
$bodyText = strip_tags($bodyText);
$bodyText = preg_replace('/\s+/', ' ', $bodyText);
$bodyText = trim($bodyText);
if (strlen($bodyText) > 500) {
// 尝试提取核心部分(去掉导航、页脚等)
$bodyText = preg_replace('/首页|关于|联系我们|版权|©|Copyright|all rights reserved/i', '', $bodyText);
$content = $bodyText;
error_log("UrlNav 备用方法: 从body提取文本长度: " . strlen($content));
}
}
}
// 内容清理
if (!empty($content)) {
// 移除过多的空白字符
$content = preg_replace('/\s+/', ' ', $content);
$content = trim($content);
// 移除常见噪音
$noisePatterns = [
'/分享到.*?(?:微信|微博|QQ|Twitter|Facebook)/',
'/阅读\s*\d+\s*次/',
'/点赞\s*\d+\s*次/',
'/本文由.*?创作/',
'/版权声明.*/',
'/转载请注明出处.*/',
'/相关文章推荐.*/',
'/你可能也喜欢.*/'
];
foreach ($noisePatterns as $pattern) {
$content = preg_replace($pattern, '', $content);
}
// 截断到合理长度
if (strlen($content) > 15000) {
$content = substr($content, 0, 15000) . '... [页面内容已截断]';
}
$finalLength = strlen($content);
error_log("UrlNav 备用方法: 提取成功,获得 {$finalLength} 字符内容");
return $content;
}
error_log("UrlNav 备用方法: 无法从页面提取有效内容");
return null;
}
/**
* 解析全文抓取白名单
* @return array 白名单数组 [rss_url => selector]
*/
public static function parseFullTextWhitelist()
{
$config = self::getConfig();
if (empty($config->fullTextWhitelist)) {
return [];
}
$whitelist = [];
$lines = explode("\n", trim($config->fullTextWhitelist));
foreach ($lines as $line) {
$line = trim($line);
if (empty($line) || strpos($line, '|') === false) {
continue;
}
list($rssUrl, $selector) = explode('|', $line, 2);
$rssUrl = trim($rssUrl);
$selector = trim($selector);
if (!empty($rssUrl) && !empty($selector)) {
$whitelist[$rssUrl] = $selector;
}
}
// 记录白名单数量用于调试
error_log("UrlNav: 解析到 " . count($whitelist) . " 个白名单条目");
foreach ($whitelist as $url => $selector) {
error_log("UrlNav: 白名单 - {$url} => {$selector}");
}
return $whitelist;
}
/**
* 检查RSS源是否在全文抓取白名单中
* @param string $rssUrl RSS地址
* @return string|false 返回选择器不在白名单返回false
*/
public static function isInFullTextWhitelist($rssUrl)
{
static $whitelist = null;
if ($whitelist === null) {
$whitelist = self::parseFullTextWhitelist();
}
// 精确匹配
if (isset($whitelist[$rssUrl])) {
error_log("UrlNav: {$rssUrl} 在白名单中,选择器: " . $whitelist[$rssUrl]);
return $whitelist[$rssUrl];
}
// 通配符匹配(支持简单的通配符)
foreach ($whitelist as $pattern => $selector) {
// 简单的通配符匹配:* 匹配任意字符
if (strpos($pattern, '*') !== false) {
$regex = str_replace('\*', '.*', preg_quote($pattern, '/'));
if (preg_match('/^' . $regex . '$/', $rssUrl)) {
error_log("UrlNav: {$rssUrl} 匹配通配符模式 {$pattern},选择器: {$selector}");
return $selector;
}
}
}
return false;
}
/**
* 修复常见的XML问题
*/
private static function fixCommonXmlIssues($content)
{
// 1. 移除非法控制字符
$content = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', '', $content);
// 2. 修复未关闭的标签(简单修复)
$content = str_replace('<br>', '<br/>', $content);
$content = str_replace('<hr>', '<hr/>', $content);
// 3. 修复未转义的&符号
$content = preg_replace('/&(?!(amp|lt|gt|quot|apos|#\d+);)/', '&amp;', $content);
// 4. 移除多余的空白字符
$content = preg_replace('/>\s+</', '><', $content);
return $content;
}
/**
* 手动刷新RSS供管理界面调用
*/
public static function manualRefreshRss()
{
return self::refreshAllRssFeeds(false);
}
/**
* 记录刷新日志 - 增强版,自动处理缺失字段
*/
private static function logRefresh($type, $successCount, $totalFeeds, $urlCount, $newArticles,
$errorMessage = null, $duration = 0, $cronType = 'rss',
$successRssUrls = null, $failedRssUrls = null)
{
try {
$db = self::getDbConnection();
// 🔴 新增:构建详细信息
$details = array();
if ($successRssUrls !== null) {
$details['success_rss_urls'] = $successRssUrls;
}
if ($failedRssUrls !== null) {
$details['failed_rss_urls'] = $failedRssUrls;
}
// 🔴 新增:构建详细消息
$message = "刷新完成:成功 {$successCount} 个,失败 " . ($urlCount - $successCount) . "";
// 如果有成功和失败的RSS地址列表添加到消息中
if ($successRssUrls !== null && !empty($successRssUrls)) {
// 截取部分RSS地址显示避免过长
$shortSuccessUrls = array_map(function($url) {
if (strlen($url) > 50) {
return substr($url, 0, 50) . '...';
}
return $url;
}, array_slice($successRssUrls, 0, 5)); // 最多显示5个
$successList = implode('; ', $shortSuccessUrls);
if (count($successRssUrls) > 5) {
$successList .= '... (共' . count($successRssUrls) . '个)';
}
$message .= "\n成功的RSS: " . $successList;
}
if ($failedRssUrls !== null && !empty($failedRssUrls)) {
// 截取部分RSS地址显示避免过长
$shortFailedUrls = array_map(function($url) {
if (strlen($url) > 50) {
return substr($url, 0, 50) . '...';
}
return $url;
}, array_slice($failedRssUrls, 0, 5)); // 最多显示5个
$failedList = implode('; ', $shortFailedUrls);
if (count($failedRssUrls) > 5) {
$failedList .= '... (共' . count($failedRssUrls) . '个)';
}
$message .= "\n失败的RSS: " . $failedList;
}
$detailsJson = !empty($details) ? json_encode($details, JSON_UNESCAPED_UNICODE) : null;
// 方法1首先尝试使用完整字段插入
try {
// 检查字段是否存在
$tableInfo = $db->query("PRAGMA table_info(urlnav_refresh_log)");
$columns = $tableInfo->fetchAll(PDO::FETCH_ASSOC);
$hasMessage = false;
$hasDetails = false;
foreach ($columns as $column) {
if ($column['name'] === 'message') {
$hasMessage = true;
}
if ($column['name'] === 'details') {
$hasDetails = true;
}
}
if ($hasMessage && $hasDetails) {
// 如果两个字段都存在,使用完整插入
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type, message, details)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType,
$message,
$detailsJson
));
error_log("UrlNav: 刷新日志记录成功(使用完整字段)");
} elseif ($hasMessage) {
// 只有message字段存在
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type, message)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType,
$message
));
error_log("UrlNav: 刷新日志记录成功使用message字段");
} else {
// 两个字段都不存在,使用旧的方式
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, new_articles, error_message,
refresh_time, duration, cron_type)
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$newArticles,
$errorMessage,
$duration,
$cronType
));
error_log("UrlNav: 刷新日志记录成功(旧方式)");
}
} catch (Exception $e) {
// 如果字段缺失,尝试修复并重试
if (strpos($e->getMessage(), 'has no column named') !== false) {
error_log("UrlNav: 检测到字段缺失,尝试修复: " . $e->getMessage());
// 运行数据库迁移
self::migrateDatabase();
// 重试插入使用更简单的SQL
try {
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, total_feeds, url_count, error_message, refresh_time, duration, cron_type)
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?)
");
$stmt->execute(array(
$type,
$successCount,
$totalFeeds,
$urlCount,
$errorMessage,
$duration,
$cronType
));
error_log("UrlNav: 刷新日志记录成功(修复后)");
} catch (Exception $retryError) {
// 如果还不行,尝试更简化的插入
try {
$stmt = $db->prepare("
INSERT INTO urlnav_refresh_log
(refresh_type, success_count, refresh_time)
VALUES (?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute(array($type, $successCount));
error_log("UrlNav: 刷新日志记录成功(最小字段)");
} catch (Exception $minimalError) {
error_log("UrlNav: 最小字段插入也失败: " . $minimalError->getMessage());
}
}
} else {
// 其他错误,直接抛出
throw $e;
}
}
// 如果需要记录到error_log中
error_log("UrlNav: 刷新统计 - 类型: {$type}, 成功: {$successCount}, 总文章: {$totalFeeds}, 网址数: {$urlCount}, 新文章: {$newArticles}, 耗时: {$duration}");
if ($successRssUrls !== null && !empty($successRssUrls)) {
error_log("UrlNav: 成功的RSS地址: " . implode(', ', array_slice($successRssUrls, 0, 3)));
}
if ($failedRssUrls !== null && !empty($failedRssUrls)) {
error_log("UrlNav: 失败的RSS地址: " . implode(', ', array_slice($failedRssUrls, 0, 3)));
}
} catch (Exception $e) {
error_log('UrlNav: 记录刷新日志失败: ' . $e->getMessage());
// 终极备用方案:只记录到错误日志
error_log("UrlNav: 刷新统计(无法写入数据库) - 类型: {$type}, 成功: {$successCount}, 总文章: {$totalFeeds}, 网址数: {$urlCount}, 新文章: {$newArticles}, 耗时: {$duration}");
}
}
/**
* 记录定时任务日志
*/
private static function logCron($type, $result)
{
try {
$db = self::getDbConnection();
$errorMessage = null;
if (is_array($result)) {
if (isset($result['error'])) {
$errorMessage = $result['error'];
}
$result = json_encode($result);
}
$stmt = $db->prepare("
INSERT INTO urlnav_cron_log
(cron_type, result, error_message, executed_time)
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute(array(
$type,
$result,
$errorMessage
));
} catch (Exception $e) {
error_log('UrlNav: 记录定时任务日志失败: ' . $e->getMessage());
}
}
/**
* 验证定时任务密钥
*/
public static function validateCronSecret($secret, $type = 'rss')
{
$config = self::getConfig();
if ($type === 'rss') {
return $secret === $config->rssCronSecret;
} elseif ($type === 'status') {
return $secret === $config->statusCronSecret;
}
return false;
}
/**
* 执行公开的RSS定时任务 - 防502版本
*/
public static function executePublicRssCron($secret = null)
{
// 关键:立即设置响应头并输出内容
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
// 立即输出JSON开始标记让Nginx知道脚本在运行
echo '{"status":"processing","message":"任务开始执行...","timestamp":' . time() . '}';
flush();
ob_flush();
}
error_log('UrlNav RSS Cron: 请求开始 ' . date('Y-m-d H:i:s'));
try {
// 验证密钥
if ($secret && !self::validateCronSecret($secret, 'rss')) {
error_log('UrlNav RSS Cron: 无效的Cron密钥');
return array(
'success' => false,
'message' => '无效的访问密钥',
'timestamp' => time()
);
}
// 执行定时任务
$result = self::executeRssCronTask();
// 最终输出完整结果
if (!headers_sent()) {
echo json_encode($result);
}
return $result;
} catch (Exception $e) {
error_log('UrlNav RSS Cron: 异常: ' . $e->getMessage());
$errorResult = array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => 'RSS定时任务执行失败'
);
if (!headers_sent()) {
echo json_encode($errorResult);
}
return $errorResult;
}
}
/**
* 执行公开的状态检查定时任务 - 无锁版
*/
public static function executePublicStatusCron($secret = null)
{
// 立即设置响应头避免502
if (!headers_sent()) {
header('Content-Type: application/json; charset=utf-8');
header('Cache-Control: no-cache, no-store, must-revalidate');
header('Pragma: no-cache');
header('Expires: 0');
}
error_log('UrlNav Status Cron: 请求 ' . date('Y-m-d H:i:s'));
try {
// 验证密钥
if ($secret && !self::validateCronSecret($secret, 'status')) {
error_log('UrlNav Status Cron: 无效的Cron密钥');
return array(
'success' => false,
'message' => '无效的访问密钥',
'timestamp' => time()
);
}
// 直接执行定时任务,不进行锁检查
$result = self::executeStatusCronTask();
return $result;
} catch (Exception $e) {
error_log('UrlNav Status Cron: 异常: ' . $e->getMessage());
return array(
'success' => false,
'error' => $e->getMessage(),
'timestamp' => time(),
'message' => '状态检查定时任务执行失败'
);
}
}
/**
* 获取解锁URL
*/
private static function getUnlockUrl($secret, $type = 'rss')
{
$options = Typecho_Widget::widget('Widget_Options');
$baseUrl = $options->siteUrl;
if (substr($baseUrl, -1) == '/') {
$baseUrl = substr($baseUrl, 0, -1);
}
return $baseUrl . '/action/urlnav?do=unlockCron&type=' . $type . '&secret=' . urlencode($secret);
}
private static function cleanExpiredCache()
{
try {
$config = self::getConfig();
$keepTime = intval($config->rssKeepTime ?? 0); // ← 默认值改为 0
// 如果设置为0则不清理默认行为
if ($keepTime <= 0) {
return 0;
}
$db = self::getDbConnection();
// 计算清理时间点 - 按照文章发布时间
$cutoffTime = date('Y-m-d H:i:s', time() - $keepTime);
// 先获取需要清理的文章数量用于日志
$countStmt = $db->prepare("
SELECT COUNT(*) as count
FROM urlnav_rss_cache
WHERE pub_date < ?
AND is_fresh = 1
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$countStmt->execute([$cutoffTime]);
$totalCount = $countStmt->fetchColumn();
if ($totalCount == 0) {
return 0;
}
// 标记过期的文章为非新鲜状态
$stmt = $db->prepare("
UPDATE urlnav_rss_cache
SET is_fresh = 0
WHERE pub_date < ?
AND is_fresh = 1
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$stmt->execute([$cutoffTime]);
$markedCount = $stmt->rowCount();
if ($markedCount > 0) {
$days = round($keepTime / 86400, 1);
error_log("UrlNav: 自动清理 - 标记了 {$markedCount} 条超过{$days}天的文章为非新鲜状态");
}
return $markedCount;
} catch (Exception $e) {
error_log('UrlNav: 清理过期文章失败: ' . $e->getMessage());
return 0;
}
}
public static function cleanAllRssCache()
{
try {
$db = self::getDbConnection();
// 清理所有非新鲜缓存(排除已收藏的文章)
$stmt = $db->prepare("
DELETE FROM urlnav_rss_cache
WHERE is_fresh = 0
AND id NOT IN (
SELECT DISTINCT feed_id FROM urlnav_favorites
)
");
$stmt->execute();
$deletedCount = $stmt->rowCount();
// 获取当前保留时间配置用于日志
$config = self::getConfig();
$keepTime = intval($config->rssKeepTime ?? 0);
if ($keepTime > 0) {
$days = round($keepTime / 86400, 1);
$logMsg = "清理了 {$deletedCount} 条非新鲜RSS文章当前保留策略{$days}天内的文章保持新鲜)";
} else {
$logMsg = "清理了 {$deletedCount} 条非新鲜RSS文章当前保留策略不自动清理仅清理已标记文章";
}
// 重置所有URL的统计信息
$db->exec("UPDATE urlnav_urls SET
refresh_count = 0,
success_count = 0,
failure_count = 0,
last_error = NULL,
last_refresh = NULL
");
if ($deletedCount > 0) {
error_log("UrlNav: {$logMsg}");
}
return $deletedCount;
} catch (Exception $e) {
error_log('UrlNav: 清理所有缓存失败: ' . $e->getMessage());
throw $e;
}
}
/**
* 获取定时任务日志 - 修复版确保RSS地址信息正确返回
*/
public static function getCronLogs($type = null, $limit = 20)
{
try {
$db = self::getDbConnection();
$sql = "SELECT * FROM urlnav_cron_log";
$params = [];
if ($type !== null) {
$sql .= " WHERE cron_type LIKE ?";
$params[] = $type . '%';
}
$sql .= " ORDER BY executed_time DESC LIMIT ?";
$params[] = $limit;
$stmt = $db->prepare($sql);
$stmt->execute($params);
$logs = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 🆕 关键修复确保每个日志项的result字段包含RSS地址信息
foreach ($logs as &$log) {
if (!empty($log['result'])) {
try {
$result = json_decode($log['result'], true);
if ($result) {
// 🔴 检查是否是RSS相关日志
$isRssLog = $log['cron_type'] && (
strpos($log['cron_type'], 'rss') !== false ||
strpos($log['cron_type'], 'refresh') !== false
);
if ($isRssLog) {
// 确保RSS地址字段存在
if (!isset($result['successRssUrls'])) {
$result['successRssUrls'] = [];
}
if (!isset($result['failedRssUrls'])) {
$result['failedRssUrls'] = [];
}
// 重新编码为JSON
$log['result'] = json_encode($result, JSON_UNESCAPED_UNICODE);
}
}
} catch (Exception $e) {
// JSON解析失败保持原样
error_log('UrlNav: 解析cron_logs的result字段失败: ' . $e->getMessage());
}
}
}
return $logs;
} catch (Exception $e) {
error_log('UrlNav: 获取定时任务日志失败: ' . $e->getMessage());
return array();
}
}
/**
* 获取定时任务统计
*/
public static function getCronStats($type = null)
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "";
$params = [];
if ($type !== null) {
$whereClause = " WHERE cron_type LIKE ?";
$params[] = $type . '%';
}
// 获取总执行次数
$stmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_cron_log $whereClause");
$stmt->execute($params);
$totalResult = $stmt->fetch(PDO::FETCH_ASSOC);
$total = $totalResult['total'] ?? 0;
// 获取成功次数
$stmt = $db->prepare("SELECT COUNT(*) as success FROM urlnav_cron_log $whereClause AND (error_message IS NULL OR error_message = '')");
$stmt->execute($params);
$successResult = $stmt->fetch(PDO::FETCH_ASSOC);
$success = $successResult['success'] ?? 0;
// 获取失败次数
$stmt = $db->prepare("SELECT COUNT(*) as failed FROM urlnav_cron_log $whereClause AND error_message IS NOT NULL AND error_message != ''");
$stmt->execute($params);
$failedResult = $stmt->fetch(PDO::FETCH_ASSOC);
$failed = $failedResult['failed'] ?? 0;
// 获取最后执行时间
$stmt = $db->prepare("SELECT MAX(executed_time) as last_executed FROM urlnav_cron_log $whereClause");
$stmt->execute($params);
$lastResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastExecuted = $lastResult['last_executed'] ?? null;
// 获取各种类型的统计
$stmt = $db->prepare("SELECT cron_type, COUNT(*) as count FROM urlnav_cron_log GROUP BY cron_type ORDER BY cron_type");
$typeResults = $stmt->fetchAll(PDO::FETCH_ASSOC);
$typeStats = array();
foreach ($typeResults as $row) {
$typeStats[$row['cron_type']] = $row['count'];
}
return array(
'total' => $total,
'success' => $success,
'failed' => $failed,
'last_executed' => $lastExecuted,
'type_stats' => $typeStats,
'success_rate' => $total > 0 ? round(($success / $total) * 100, 2) : 0
);
} catch (Exception $e) {
return array(
'total' => 0,
'success' => 0,
'failed' => 0,
'last_executed' => null,
'type_stats' => array(),
'success_rate' => 0
);
}
}
/**
* 获取RSS定时任务统计
*/
public static function getRssCronStats()
{
return self::getCronStats('rss');
}
/**
* 获取状态检查定时任务统计
*/
public static function getStatusCronStats()
{
return self::getCronStats('status');
}
/**
* 获取有RSS地址的网址总数
*/
public static function getTotalUrlsWithRss()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT COUNT(*) as total FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取有RSS地址的网址总数失败: ' . $e->getMessage());
return 0;
}
}
/**
* 获取从未刷新过的网址数量
*/
public static function getUnrefreshedUrlsCount()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT COUNT(*) as total FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND last_refresh IS NULL
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取从未刷新过的网址数量失败: ' . $e->getMessage());
return 0;
}
}
/**
* 获取最久的刷新时间
*/
public static function getOldestRefreshTime()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("
SELECT MIN(last_refresh) as oldest FROM urlnav_urls
WHERE is_active = 1
AND rss_url IS NOT NULL
AND TRIM(rss_url) != ''
AND last_refresh IS NOT NULL
");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['oldest'] ?? null;
} catch (Exception $e) {
error_log('UrlNav: 获取最久的刷新时间失败: ' . $e->getMessage());
return null;
}
}
/**
* 获取所有分类
*/
public static function getAllCategories()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("SELECT * FROM urlnav_categories WHERE is_active = 1 ORDER BY sort_order, created_at DESC");
$categories = $stmt->fetchAll(PDO::FETCH_ASSOC);
return $categories;
} catch (Exception $e) {
error_log('UrlNav: 获取分类失败: ' . $e->getMessage());
return array();
}
}
/**
* 获取所有网址(支持分页和分类筛选、状态筛选)
*/
public static function getAllUrls($categoryId = null, $page = 1, $pageSize = 20, $search = '', $status = '', $hasRss = '', $starRating = null)
{
try {
$db = self::getDbConnection();
$whereClause = "WHERE u.is_active = 1";
$params = array();
if ($categoryId !== null && $categoryId !== '') {
$whereClause .= " AND u.category_id = ?";
$params[] = $categoryId;
}
// 添加状态筛选 - 新增
if ($status !== null && $status !== '') {
switch ($status) {
case 'online':
$whereClause .= " AND u.is_online = 1 AND u.last_status_check IS NOT NULL";
break;
case 'offline':
$whereClause .= " AND u.is_online = 0 AND u.last_status_check IS NOT NULL";
break;
case 'unchecked':
$whereClause .= " AND u.last_status_check IS NULL";
break;
}
}
// RSS筛选 - 简化版(与状态筛选保持一致)
if ($hasRss !== null && $hasRss !== '') {
switch ($hasRss) {
case 'yes':
// 有RSS地址
$whereClause .= " AND u.rss_url IS NOT NULL AND u.rss_url != ''";
break;
case 'no':
// 无RSS地址
$whereClause .= " AND (u.rss_url IS NULL OR u.rss_url = '')";
break;
}
}
// 新增:星级筛选
if ($starRating !== null && $starRating !== '') {
switch ($starRating) {
case '1':
$whereClause .= " AND u.star_rating = 1";
break;
case '2':
$whereClause .= " AND u.star_rating = 2";
break;
case '3':
$whereClause .= " AND u.star_rating = 3";
break;
case '0':
$whereClause .= " AND u.star_rating = 0";
break;
case 'starred':
$whereClause .= " AND u.star_rating > 0";
break;
}
}
if ($search) {
$whereClause .= " AND (u.title LIKE ? OR u.url LIKE ? OR u.description LIKE ? OR u.rss_url LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countStmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_urls u $whereClause");
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// 计算分页
$offset = ($page - 1) * $pageSize;
// 获取数据(包含分类名称)
$sql = "SELECT u.*, c.name as category_name
FROM urlnav_urls u
LEFT JOIN urlnav_categories c ON u.category_id = c.id
$whereClause
ORDER BY u.sort_order, u.created_at DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, array($pageSize, $offset));
$stmt->execute($stmtParams);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
return array(
'total' => $total,
'totalPages' => ceil($total / $pageSize),
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $urls
);
} catch (Exception $e) {
error_log('UrlNav: 获取网址失败: ' . $e->getMessage());
return array(
'total' => 0,
'totalPages' => 0,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => array()
);
}
}
/**
* 通过ID获取分类
*/
public static function getCategoryById($id)
{
if (empty($id)) {
return null;
}
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT * FROM urlnav_categories WHERE id = ?");
$stmt->execute(array($id));
$category = $stmt->fetch(PDO::FETCH_ASSOC);
return $category;
} catch (Exception $e) {
error_log('UrlNav: 获取分类信息失败: ' . $e->getMessage());
return null;
}
}
/**
* 通过ID获取网址
*/
public static function getUrlById($id)
{
if (empty($id)) {
return null;
}
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT * FROM urlnav_urls WHERE id = ?");
$stmt->execute(array($id));
$url = $stmt->fetch(PDO::FETCH_ASSOC);
return $url;
} catch (Exception $e) {
error_log('UrlNav: 获取网址信息失败: ' . $e->getMessage());
return null;
}
}
/**
* 验证URL是否有效
*/
public static function validateUrl($url)
{
if (empty($url)) {
return false;
}
// 检查URL格式
if (!filter_var($url, FILTER_VALIDATE_URL)) {
return false;
}
// 检查URL协议
$parsedUrl = parse_url($url);
if (!in_array($parsedUrl['scheme'] ?? '', array('http', 'https'))) {
return false;
}
return true;
}
/**
* 获取所有RSS信息支持分页和分类筛选、搜索、星级筛选
*/
public static function getAllRssFeeds($categoryId = null, $page = 1, $pageSize = 20, $search = '', $starRating = '')
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "WHERE c.is_fresh = 1";
$params = array();
if ($categoryId !== null && $categoryId !== '' && $categoryId !== 'all') {
$whereClause .= " AND u.category_id = ?";
$params[] = $categoryId;
}
// 星级筛选
if ($starRating !== null && $starRating !== '') {
if ($starRating === '1') {
$whereClause .= " AND u.star_rating = 1";
} elseif ($starRating === '2') {
$whereClause .= " AND u.star_rating = 2";
} elseif ($starRating === '3') {
$whereClause .= " AND u.star_rating = 3";
} elseif ($starRating === '0') {
$whereClause .= " AND u.star_rating = 0";
} elseif ($starRating === 'starred') {
$whereClause .= " AND u.star_rating > 0";
}
}
if ($search) {
$whereClause .= " AND (c.feed_title LIKE ? OR c.feed_description LIKE ? OR c.full_content LIKE ? OR u.title LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countSql = "SELECT COUNT(*) as total
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
$whereClause";
$countStmt = $db->prepare($countSql);
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// ==== 修复3385行的错误 ====
// 确保page和pageSize是数字类型
$page = is_numeric($page) ? (int)$page : 1;
$pageSize = is_numeric($pageSize) ? (int)$pageSize : 20;
// 现在计算偏移量 - 这应该是3385行
$offset = ($page - 1) * $pageSize;
// ==== 修复结束 ====
// 获取数据
$sql = "SELECT c.*, u.title as site_title, u.url as site_url, u.category_id, u.star_rating,
cat.name as category_name
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
LEFT JOIN urlnav_categories cat ON u.category_id = cat.id
$whereClause
ORDER BY c.pub_date DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, array($pageSize, $offset));
$stmt->execute($stmtParams);
$feeds = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 计算总页数
$totalPages = $pageSize > 0 ? ceil($total / $pageSize) : 0;
return array(
'total' => $total,
'totalPages' => $totalPages,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $feeds
);
} catch (Exception $e) {
error_log('UrlNav: 获取RSS信息失败: ' . $e->getMessage());
return array(
'total' => 0,
'totalPages' => 0,
'currentPage' => is_numeric($page) ? (int)$page : 1,
'pageSize' => is_numeric($pageSize) ? (int)$pageSize : 20,
'data' => array()
);
}
}
/**
* 获取星级选项
*/
public static function getStarRatingOptions()
{
return array(
'0' => '无星级',
'1' => '★',
'2' => '★★',
'3' => '★★★'
);
}
/**
* 获取星级显示文本
*/
public static function getStarRatingText($rating)
{
$options = self::getStarRatingOptions();
return isset($options[$rating]) ? $options[$rating] : '无星级';
}
/**
* 获取缓存统计信息
*/
public static function getCacheStats()
{
try {
$db = self::getDbConnection();
// 获取总缓存数
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_rss_cache");
$totalResult = $stmt->fetch(PDO::FETCH_ASSOC);
$total = $totalResult['total'] ?? 0;
// 获取新鲜缓存数
$stmt = $db->query("SELECT COUNT(*) as fresh FROM urlnav_rss_cache WHERE is_fresh = 1");
$freshResult = $stmt->fetch(PDO::FETCH_ASSOC);
$fresh = $freshResult['fresh'] ?? 0;
// 获取陈旧缓存数
$stmt = $db->query("SELECT COUNT(*) as stale FROM urlnav_rss_cache WHERE is_fresh = 0");
$staleResult = $stmt->fetch(PDO::FETCH_ASSOC);
$stale = $staleResult['stale'] ?? 0;
// 获取最新缓存时间
$stmt = $db->query("SELECT MAX(cached_at) as last_cached FROM urlnav_rss_cache");
$lastResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastCached = $lastResult['last_cached'] ?? null;
// 获取最早缓存时间
$stmt = $db->query("SELECT MIN(cached_at) as first_cached FROM urlnav_rss_cache");
$firstResult = $stmt->fetch(PDO::FETCH_ASSOC);
$firstCached = $firstResult['first_cached'] ?? null;
// 获取最后一次自动刷新时间
$stmt = $db->query("SELECT MAX(refresh_time) as last_auto_refresh FROM urlnav_refresh_log WHERE cron_type = 'rss'");
$refreshResult = $stmt->fetch(PDO::FETCH_ASSOC);
$lastAutoRefresh = $refreshResult['last_auto_refresh'] ?? null;
// 获取URL统计
$stmt = $db->query("
SELECT
COUNT(DISTINCT url_id) as url_count,
AVG((SELECT COUNT(*) FROM urlnav_rss_cache c2 WHERE c2.url_id = c.url_id)) as avg_feeds_per_url,
MAX((SELECT COUNT(*) FROM urlnav_rss_cache c2 WHERE c2.url_id = c.url_id)) as max_feeds_per_url
FROM urlnav_rss_cache c
");
$urlStats = $stmt->fetch(PDO::FETCH_ASSOC);
return array(
'total' => $total,
'fresh' => $fresh,
'stale' => $stale,
'last_cached' => $lastCached,
'first_cached' => $firstCached,
'last_auto_refresh' => $lastAutoRefresh,
'url_stats' => $urlStats
);
} catch (Exception $e) {
return array(
'total' => 0,
'fresh' => 0,
'stale' => 0,
'last_cached' => null,
'first_cached' => null,
'last_auto_refresh' => null,
'url_stats' => array('url_count' => 0, 'avg_feeds_per_url' => 0, 'max_feeds_per_url' => 0)
);
}
}
/**
* 获取刷新统计信息
*/
public static function getRefreshStats($cronType = 'rss', $limit = 10)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("
SELECT * FROM urlnav_refresh_log
WHERE cron_type = ?
ORDER BY refresh_time DESC
LIMIT ?
");
$stmt->execute(array($cronType, $limit));
$logs = $stmt->fetchAll(PDO::FETCH_ASSOC);
// 计算总体统计
$totalStmt = $db->prepare("
SELECT
COUNT(*) as total_refreshes,
SUM(success_count) as total_success,
SUM(new_articles) as total_new_articles,
AVG(duration) as avg_duration
FROM urlnav_refresh_log
WHERE cron_type = ?
");
$totalStmt->execute(array($cronType));
$totalStats = $totalStmt->fetch(PDO::FETCH_ASSOC);
return array(
'logs' => $logs,
'total_stats' => $totalStats
);
} catch (Exception $e) {
return array(
'logs' => array(),
'total_stats' => array(
'total_refreshes' => 0,
'total_success' => 0,
'total_new_articles' => 0,
'avg_duration' => 0
)
);
}
}
public static function manualCheckStatus($urlIds = null, $isBatchCheck = false, $batchInfo = null)
{
// 增加执行时间限制
@set_time_limit(300); // 5分钟
try {
$db = self::getDbConnection();
// 解析批次信息
$batchNumber = 1;
$totalBatches = 1;
$batchSize = 10;
$selectedIds = []; // 存储所有选中的ID
if ($batchInfo) {
$batchData = json_decode($batchInfo, true);
if ($batchData) {
$batchNumber = intval($batchData['batch'] ?? 1);
$totalBatches = intval($batchData['total'] ?? 1);
$batchSize = intval($batchData['size'] ?? 10);
// 如果有存储的选中ID优先使用
if (!empty($batchData['selected_ids'])) {
if (is_string($batchData['selected_ids'])) {
$selectedIds = array_filter(explode(',', $batchData['selected_ids']), 'is_numeric');
} else if (is_array($batchData['selected_ids'])) {
$selectedIds = array_filter($batchData['selected_ids'], 'is_numeric');
}
}
}
}
// ==== 修改开始:自动检查逻辑 ====
// 如果是自动检查($urlIds为null且不是批量检查
if ($urlIds === null && !$isBatchCheck && $batchInfo === null) {
// 获取配置中的每次检查数量
$config = self::getConfig();
$maxCheck = intval($config->statusCheckMax ?? 80);
// 安全限制
if ($maxCheck > 200) $maxCheck = 200;
if ($maxCheck < 1) $maxCheck = 20;
// 自动检查逻辑:优先检查未检查过的,然后按检查时间排序
$sql = "SELECT * FROM urlnav_urls WHERE is_active = 1
ORDER BY
CASE WHEN last_status_check IS NULL THEN 0 ELSE 1 END,
last_status_check ASC NULLS FIRST
LIMIT ?";
$params = [$maxCheck];
$stmt = $db->prepare($sql);
$stmt->execute($params);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
if (empty($urls)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0
];
}
// 执行检查
$successCount = 0;
$failedCount = 0;
$results = [];
$totalResponseTime = 0;
// 🔴 新增:收集成功和失败的网址
$successUrls = [];
$failedUrls = [];
$logStmt = $db->prepare("
INSERT INTO urlnav_status_log
(url_id, is_online, status_code, response_time, check_time, error_message)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, ?)
");
$updateStmt = $db->prepare("
UPDATE urlnav_urls SET
is_online = ?,
last_status_check = CURRENT_TIMESTAMP,
status_check_count = status_check_count + 1,
last_status_code = ?
WHERE id = ?
");
foreach ($urls as $url) {
$status = self::checkSingleWebsite($url['url']);
$urlId = $url['id'];
$results[$urlId] = $status;
if ($status['success']) {
$totalResponseTime += $status['response_time'];
$successCount++;
// 🔴 新增:收集成功网址
$successUrls[] = [
'id' => $urlId,
'url' => $url['url'],
'title' => $url['title'] ?? $url['url'],
'response_time' => $status['response_time']
];
} else {
$failedCount++;
// 🔴 新增:收集失败网址
$failedUrls[] = [
'id' => $urlId,
'url' => $url['url'],
'title' => $url['title'] ?? $url['url'],
'error' => $status['message'],
'status_code' => $status['status_code']
];
}
// 记录日志
$logStmt->execute([
$urlId,
$status['success'] ? 1 : 0,
$status['status_code'],
$status['response_time'],
$status['message']
]);
// 更新状态
$updateStmt->execute([
$status['success'] ? 1 : 0,
$status['status_code'],
$urlId
]);
// 短暂休息,避免服务器压力过大
if (count($results) % 5 == 0) {
usleep(50000); // 50ms
}
}
$avgResponseTime = $successCount > 0 ? round($totalResponseTime / $successCount, 2) : 0;
return [
'success' => true,
'message' => sprintf("自动检查完成: %d成功, %d失败", $successCount, $failedCount),
'total' => count($urls),
'success_count' => $successCount,
'failed_count' => $failedCount,
'avg_response_time' => $avgResponseTime,
'results' => $results,
// 🔴 新增:返回成功和失败的网址列表
'successUrls' => $successUrls,
'failedUrls' => $failedUrls
];
}
// ==== 修改结束:自动检查逻辑 ====
// 以下保持原样,手动检查和批量检查逻辑不变
// 构建查询 - 修复关键逻辑
$sql = "SELECT * FROM urlnav_urls WHERE is_active = 1";
$params = [];
$totalUrls = 0;
// 确定要检查的网址ID
$idsToCheck = [];
// 情况1直接传入ID数组或字符串
if ($urlIds !== null) {
if (is_array($urlIds)) {
if (empty($urlIds)) {
return [
'success' => true,
'message' => '没有选择要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
$idArray = array_filter($urlIds, 'is_numeric');
} else if (is_string($urlIds) && $urlIds !== '') {
if (strpos($urlIds, ',') !== false) {
$idArray = array_filter(explode(',', $urlIds), 'is_numeric');
} else if (is_numeric($urlIds)) {
$idArray = [$urlIds];
}
}
}
// 情况2从批次信息中获取选中的ID
else if (!empty($selectedIds)) {
$idsToCheck = $selectedIds;
}
// 如果有要检查的ID构建IN查询
if (!empty($idsToCheck)) {
$totalUrls = count($idsToCheck);
// 重新计算批次信息
$totalBatches = ceil($totalUrls / $batchSize);
// 如果批次号超过总批次,返回完成
if ($batchNumber > $totalBatches) {
return [
'success' => true,
'message' => '所有选中的网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'completed' => true
];
}
// 计算当前批次的ID范围
$offset = ($batchNumber - 1) * $batchSize;
$currentBatchIds = array_slice($idsToCheck, $offset, $batchSize);
if (empty($currentBatchIds)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
// 构建IN查询
$placeholders = implode(',', array_fill(0, count($currentBatchIds), '?'));
$sql .= " AND id IN ($placeholders) ORDER BY id";
$params = $currentBatchIds;
}
// 情况3没有指定ID检查全部网址
else {
$sql .= " ORDER BY last_status_check ASC NULLS FIRST, id ASC LIMIT ? OFFSET ?";
// 获取总网址数用于计算批次
$totalStmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$totalResult = $totalStmt->fetch(PDO::FETCH_ASSOC);
$totalUrls = $totalResult['total'] ?? 0;
$totalBatches = ceil($totalUrls / $batchSize);
// 如果批次号超过总批次,返回完成
if ($batchNumber > $totalBatches) {
return [
'success' => true,
'message' => '所有网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'completed' => true
];
}
$offset = ($batchNumber - 1) * $batchSize;
$params = [$batchSize, $offset];
}
$stmt = $db->prepare($sql);
$stmt->execute($params);
$urls = $stmt->fetchAll(PDO::FETCH_ASSOC);
if (empty($urls)) {
return [
'success' => true,
'message' => '没有需要检查的网址',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches
];
}
$successCount = 0;
$failedCount = 0;
$results = [];
$totalResponseTime = 0;
$checkedIds = [];
// 准备更新语句
$logStmt = $db->prepare("
INSERT INTO urlnav_status_log
(url_id, is_online, status_code, response_time, check_time, error_message)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, ?)
");
$updateStmt = $db->prepare("
UPDATE urlnav_urls SET
is_online = ?,
last_status_check = CURRENT_TIMESTAMP,
status_check_count = status_check_count + 1,
last_status_code = ?
WHERE id = ?
");
foreach ($urls as $url) {
$status = self::checkSingleWebsite($url['url']);
$urlId = $url['id'];
$results[$urlId] = $status;
$checkedIds[] = $urlId;
if ($status['success']) {
$totalResponseTime += $status['response_time'];
$successCount++;
} else {
$failedCount++;
}
// 记录日志
$logStmt->execute([
$urlId,
$status['success'] ? 1 : 0,
$status['status_code'],
$status['response_time'],
$status['message']
]);
// 更新状态
$updateStmt->execute([
$status['success'] ? 1 : 0,
$status['status_code'],
$urlId
]);
// 短暂休息,避免服务器压力过大
if (count($checkedIds) % 5 == 0) {
usleep(100000); // 100ms
}
}
// 判断是否还有更多批次
$hasMore = false;
$processedCount = ($batchNumber - 1) * $batchSize + count($checkedIds);
// 如果有选中的ID
if (!empty($idsToCheck)) {
$hasMore = ($processedCount < count($idsToCheck));
}
// 如果是检查全部
else {
$hasMore = ($processedCount < $totalUrls);
}
$avgResponseTime = $successCount > 0 ? round($totalResponseTime / $successCount, 2) : 0;
// 准备批次信息用于下一批
$nextBatchInfo = [
'batch' => $batchNumber + 1,
'total' => $totalBatches,
'size' => $batchSize,
'selected_ids' => !empty($idsToCheck) ? implode(',', $idsToCheck) : ''
];
return [
'success' => true,
'message' => sprintf("第%d批检查完成: %d成功, %d失败", $batchNumber, $successCount, $failedCount),
'total' => count($urls),
'success_count' => $successCount,
'failed_count' => $failedCount,
'avg_response_time' => $avgResponseTime,
'results' => $results,
'has_more' => $hasMore,
'checked_ids' => $checkedIds,
'batch_number' => $batchNumber,
'total_batches' => $totalBatches,
'next_batch_info' => json_encode($nextBatchInfo),
'processed_count' => $processedCount,
'total_to_process' => !empty($idsToCheck) ? count($idsToCheck) : $totalUrls
];
} catch (Exception $e) {
error_log('UrlNav: 检查网站状态失败: ' . $e->getMessage());
return [
'success' => false,
'message' => '检查失败: ' . $e->getMessage(),
'has_more' => false,
'batch_number' => isset($batchNumber) ? $batchNumber : 1,
'total_batches' => isset($totalBatches) ? $totalBatches : 1
];
}
}
/**
* 检查单个网站状态 - 优化版解决HEAD请求兼容性问题
*/
private static function checkSingleWebsite($url)
{
$startTime = microtime(true);
try {
// 获取配置的超时时间
$config = self::getConfig();
$timeout = intval($config->statusCheckTimeout ?? 15);
// 记录调试信息
error_log("UrlNav: 开始检查网站状态: {$url} [使用GET方法]");
$ch = curl_init();
// 修改使用GET请求而不是HEAD请求
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true, // 改为true以获取响应
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_CONNECTTIMEOUT => $timeout,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
CURLOPT_ENCODING => '',
CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
CURLOPT_FAILONERROR => false,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_NOBODY => false, // 重要使用GET而不是HEAD
CURLOPT_HEADER => true, // 获取头部信息
CURLOPT_RANGE => '0-1024', // 只获取前1KB内容减少带宽使用
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$totalTime = curl_getinfo($ch, CURLINFO_TOTAL_TIME);
$connectTime = curl_getinfo($ch, CURLINFO_CONNECT_TIME);
$namelookupTime = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME);
$redirectCount = curl_getinfo($ch, CURLINFO_REDIRECT_COUNT);
$effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$error = curl_error($ch);
$errorNo = curl_errno($ch);
curl_close($ch);
$endTime = microtime(true);
$responseTime = round(($endTime - $startTime) * 1000, 2);
// 详细记录
error_log("UrlNav: GET检查结果 - URL: {$url}, HTTP: {$httpCode}, 耗时: {$totalTime}s");
if ($errorNo !== 0) {
// cURL错误
$errorMessages = [
CURLE_COULDNT_CONNECT => '无法连接服务器',
CURLE_COULDNT_RESOLVE_HOST => '无法解析主机名',
CURLE_OPERATION_TIMEDOUT => '请求超时',
CURLE_SSL_CONNECT_ERROR => 'SSL连接错误',
CURLE_GOT_NOTHING => '服务器无响应',
];
$errorMessage = $errorMessages[$errorNo] ?? '连接失败: ' . $error;
error_log("UrlNav: cURL错误 - {$errorMessage}");
return [
'success' => false,
'status_code' => 0,
'message' => $errorMessage,
'response_time' => $responseTime
];
}
// 更宽松的成功判断
$isSuccess = false;
$statusMessage = '';
// 判断是否成功
if ($httpCode >= 200 && $httpCode < 500) {
// 2xx, 3xx, 4xx都算成功网站可访问
// 4xx表示客户端错误但网站本身是可访问的
$isSuccess = true;
if ($httpCode >= 200 && $httpCode < 300) {
$statusMessage = "正常 (HTTP {$httpCode})";
} elseif ($httpCode >= 300 && $httpCode < 400) {
$statusMessage = "重定向 (HTTP {$httpCode})";
} else {
$statusMessage = "客户端错误但网站可访问 (HTTP {$httpCode})";
}
// 添加重定向信息
if ($redirectCount > 0 && $url != $effectiveUrl) {
$statusMessage .= ",经过 {$redirectCount} 次重定向";
}
} elseif ($httpCode >= 500) {
// 5xx服务器错误
$isSuccess = false;
$statusMessage = "服务器错误 (HTTP {$httpCode})";
} elseif ($httpCode == 0) {
// 没有获取到HTTP状态码
$isSuccess = false;
$statusMessage = "未获取到HTTP状态码";
}
// 特殊处理:如果总时间超过阈值但仍返回成功码,标记为慢速但成功
if ($isSuccess && $totalTime > 3) {
$statusMessage = "慢速但可访问 ({$totalTime}s, HTTP {$httpCode})";
}
return [
'success' => $isSuccess,
'status_code' => $httpCode,
'message' => $statusMessage,
'response_time' => $responseTime,
'response_time_curl' => round($totalTime * 1000, 2),
'redirect_count' => $redirectCount,
'effective_url' => $effectiveUrl,
'method' => 'GET', // 记录使用的方法
'timing' => [
'total' => $totalTime,
'connect' => $connectTime,
'dns' => $namelookupTime
]
];
} catch (Exception $e) {
error_log("UrlNav: 检查异常 - {$url}: " . $e->getMessage());
return [
'success' => false,
'status_code' => 0,
'message' => '检查异常: ' . $e->getMessage(),
'response_time' => round((microtime(true) - $startTime) * 1000, 2)
];
}
}
/**
* 检查网站状态 - 修复版
*/
public function checkStatus()
{
try {
// 检查是否是批量检查 - 接收两种可能的参数名
$urlIds = $this->request->get('url_ids');
if (empty($urlIds)) {
$urlIds = $this->request->get('urlIds'); // 尝试另一种参数名
}
$batchInfo = $this->request->get('batch_info');
// 解析URL IDs
$idArray = null;
if ($urlIds && $urlIds !== '') {
// 处理不同的ID格式
if (is_array($urlIds)) {
$idArray = $urlIds;
} else if (strpos($urlIds, ',') !== false) {
// 逗号分隔的字符串
$idArray = array_map('trim', explode(',', $urlIds));
$idArray = array_filter($idArray, function($id) {
return is_numeric($id) && $id > 0;
});
} else if (is_numeric($urlIds)) {
// 单个ID
$idArray = [$urlIds];
}
}
// 解析批次信息
$batchData = null;
if ($batchInfo && is_string($batchInfo)) {
$batchData = json_decode($batchInfo, true);
} else if (is_array($batchInfo)) {
$batchData = $batchInfo;
}
// 如果是选中的网址,需要特殊处理批次信息
if (!empty($idArray) && $batchData) {
// 重新计算批次信息,因为选中的网址总数可能和全部网址不同
$batchSize = $batchData['size'] ?? 10;
$totalSelected = count($idArray);
$totalBatches = ceil($totalSelected / $batchSize);
$batchNumber = $batchData['batch'] ?? 1;
// 确保批次号不超过总批次
if ($batchNumber > $totalBatches) {
return $this->response->throwJson([
'success' => true,
'message' => '所有选中的网址已检查完成',
'total' => 0,
'success_count' => 0,
'failed_count' => 0,
'has_more' => false
]);
}
// 获取当前批次要检查的ID
$offset = ($batchNumber - 1) * $batchSize;
$batchIds = array_slice($idArray, $offset, $batchSize);
// 记录调试信息
error_log("UrlNav Action: 选中检查,批次: $batchNumber/$totalBatches, 本批ID: " . implode(',', $batchIds));
// 调用检查函数 - 传递当前批次的ID不再传递批次信息让后端分页
$result = UrlNav_Plugin::manualCheckStatus($batchIds, true, null);
// 更新批次信息
$result['batch_number'] = $batchNumber;
$result['total_batches'] = $totalBatches;
$result['has_more'] = ($batchNumber < $totalBatches);
$result['batch_info'] = json_encode([
'batch' => $batchNumber,
'total' => $totalBatches,
'size' => $batchSize,
'selected_ids' => implode(',', $idArray) // 记录所有选中的ID
]);
$this->response->throwJson($result);
return;
}
// 如果没有选中网址,检查全部(已有的逻辑)
$result = UrlNav_Plugin::manualCheckStatus(null, true, $batchInfo);
$this->response->throwJson($result);
} catch (Exception $e) {
$this->response->throwJson([
'success' => false,
'message' => '检查失败: ' . $e->getMessage()
]);
}
}
/**
* 获取网站总数
*/
private static function getTotalUrlCount()
{
try {
$db = self::getDbConnection();
$stmt = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result['total'] ?? 0;
} catch (Exception $e) {
error_log('UrlNav: 获取网站总数失败: ' . $e->getMessage());
return 0;
}
}
/**
* 更新状态检查统计
*/
private static function updateStatusStats($checkResult)
{
try {
$db = self::getDbConnection();
// 获取当前统计
$stmt = $db->query("SELECT * FROM urlnav_status_stats LIMIT 1");
$currentStats = $stmt->fetch(PDO::FETCH_ASSOC);
if ($currentStats) {
// 更新统计
$totalChecks = $currentStats['total_checks'] + $checkResult['total'];
$successChecks = $currentStats['success_checks'] + $checkResult['success_count'];
$failedChecks = $currentStats['failed_checks'] + $checkResult['failed_count'];
// 计算平均响应时间(加权平均)
$currentAvg = $currentStats['avg_response_time'] * $currentStats['total_checks'];
$newAvg = $checkResult['avg_response_time'] * $checkResult['total'];
$avgResponseTime = $totalChecks > 0 ? ($currentAvg + $newAvg) / $totalChecks : 0;
$stmt = $db->prepare("
UPDATE urlnav_status_stats SET
total_checks = ?,
success_checks = ?,
failed_checks = ?,
avg_response_time = ?,
last_check_time = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = ?
");
$stmt->execute([
$totalChecks,
$successChecks,
$failedChecks,
round($avgResponseTime, 2),
$currentStats['id']
]);
} else {
// 插入新统计
$stmt = $db->prepare("
INSERT INTO urlnav_status_stats
(total_checks, success_checks, failed_checks, avg_response_time, last_check_time)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
");
$stmt->execute([
$checkResult['total'],
$checkResult['success_count'],
$checkResult['failed_count'],
round($checkResult['avg_response_time'], 2)
]);
}
} catch (Exception $e) {
error_log('UrlNav: 更新状态检查统计失败: ' . $e->getMessage());
}
}
/**
* 获取状态检查统计
*/
public static function getStatusStats()
{
try {
$db = self::getDbConnection();
// 获取总数
$totalQuery = $db->query("SELECT COUNT(*) as total FROM urlnav_urls WHERE is_active = 1");
$totalResult = $totalQuery->fetch(PDO::FETCH_ASSOC);
$total = $totalResult ? $totalResult['total'] : 0;
// 获取在线数
$onlineQuery = $db->query("SELECT COUNT(*) as online FROM urlnav_urls WHERE is_active = 1 AND is_online = 1");
$onlineResult = $onlineQuery->fetch(PDO::FETCH_ASSOC);
$online = $onlineResult ? $onlineResult['online'] : 0;
// 获取离线数
$offlineQuery = $db->query("SELECT COUNT(*) as offline FROM urlnav_urls WHERE is_active = 1 AND is_online = 0 AND last_status_check IS NOT NULL");
$offlineResult = $offlineQuery->fetch(PDO::FETCH_ASSOC);
$offline = $offlineResult ? $offlineResult['offline'] : 0;
// 获取未检查数
$uncheckedQuery = $db->query("SELECT COUNT(*) as unchecked FROM urlnav_urls WHERE is_active = 1 AND last_status_check IS NULL");
$uncheckedResult = $uncheckedQuery->fetch(PDO::FETCH_ASSOC);
$unchecked = $uncheckedResult ? $uncheckedResult['unchecked'] : 0;
// 添加RSS统计
$rssQuery = $db->query("SELECT
SUM(CASE WHEN rss_url IS NOT NULL AND rss_url != '' THEN 1 ELSE 0 END) as has_rss,
SUM(CASE WHEN rss_url IS NULL OR rss_url = '' THEN 1 ELSE 0 END) as no_rss
FROM urlnav_urls WHERE is_active = 1");
$rssResult = $rssQuery->fetch(PDO::FETCH_ASSOC);
$has_rss = $rssResult ? $rssResult['has_rss'] : 0;
$no_rss = $rssResult ? $rssResult['no_rss'] : 0;
// 计算在线率
$online_rate = $total > 0 ? round(($online / $total) * 100, 1) : 0;
return [
'total' => (int)$total,
'online' => (int)$online,
'offline' => (int)$offline,
'unchecked' => (int)$unchecked,
'online_rate' => (float)$online_rate,
'has_rss' => (int)$has_rss,
'no_rss' => (int)$no_rss
];
} catch (Exception $e) {
error_log('UrlNav getStatusStats error: ' . $e->getMessage());
return [
'total' => 0,
'online' => 0,
'offline' => 0,
'unchecked' => 0,
'online_rate' => 0,
'has_rss' => 0,
'no_rss' => 0
];
}
}
/**
* 获取RSS刷新统计
*/
public static function getRssRefreshStats()
{
try {
$stats = array();
// 获取有RSS的网址总数
$stats['total_with_rss'] = self::getTotalUrlsWithRss();
// 获取从未刷新的数量
$stats['never_refreshed'] = self::getUnrefreshedUrlsCount();
// 获取最久刷新时间
$stats['oldest_refresh'] = self::getOldestRefreshTime();
// 计算进度
if ($stats['total_with_rss'] > 0) {
$stats['refreshed_percentage'] = round(($stats['total_with_rss'] - $stats['never_refreshed']) / $stats['total_with_rss'] * 100, 1);
} else {
$stats['refreshed_percentage'] = 0;
}
// 获取配置的每批处理数量
$config = self::getConfig();
$stats['refresh_limit'] = intval($config->rssRefreshLimit ?? 20);
// 计算预计完成所需任务次数
if ($stats['refresh_limit'] > 0 && $stats['never_refreshed'] > 0) {
$stats['estimated_tasks'] = ceil($stats['never_refreshed'] / $stats['refresh_limit']);
} else {
$stats['estimated_tasks'] = 0;
}
// 计算最久未刷新的时间
if ($stats['oldest_refresh']) {
$hoursAgo = round((time() - strtotime($stats['oldest_refresh'])) / 3600, 1);
$stats['oldest_hours_ago'] = $hoursAgo;
}
return $stats;
} catch (Exception $e) {
error_log('UrlNav: 获取RSS刷新统计失败: ' . $e->getMessage());
return array(
'total_with_rss' => 0,
'never_refreshed' => 0,
'refreshed_percentage' => 0,
'refresh_limit' => 20,
'estimated_tasks' => 0
);
}
}
// ============ 收藏功能相关方法 ============
/**
* 添加收藏
*/
public static function addFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
// 获取完整的文章信息
$stmt = $db->prepare("
SELECT c.*, u.title as site_title, u.url as site_url, cat.name as category_name
FROM urlnav_rss_cache c
LEFT JOIN urlnav_urls u ON c.url_id = u.id
LEFT JOIN urlnav_categories cat ON u.category_id = cat.id
WHERE c.id = ?
");
$stmt->execute([$feedId]);
$feed = $stmt->fetch(PDO::FETCH_ASSOC);
if (!$feed) {
return ['success' => false, 'message' => '文章不存在'];
}
// 检查是否已收藏
$stmt = $db->prepare("SELECT id FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
$existing = $stmt->fetch();
if ($existing) {
return ['success' => false, 'message' => '已收藏'];
}
// 添加收藏(保存完整信息)
$stmt = $db->prepare("
INSERT INTO urlnav_favorites
(user_id, feed_id, feed_title, feed_link, feed_description, full_content, pub_date,
site_title, site_url, category_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
");
$stmt->execute([
$userId,
$feedId,
$feed['feed_title'],
$feed['feed_link'],
$feed['feed_description'] ?? '',
$feed['full_content'] ?? '', // 保存完整内容
$feed['pub_date'],
$feed['site_title'] ?? '',
$feed['site_url'] ?? '',
$feed['category_name'] ?? ''
]);
// 标记文章为新鲜状态,避免被清理
$stmt = $db->prepare("UPDATE urlnav_rss_cache SET is_fresh = 1 WHERE id = ?");
$stmt->execute([$feedId]);
return ['success' => true, 'message' => '收藏成功'];
} catch (Exception $e) {
error_log('UrlNav: 添加收藏失败: ' . $e->getMessage());
return ['success' => false, 'message' => '收藏失败: ' . $e->getMessage()];
}
}
/**
* 取消收藏
*/
public static function removeFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("DELETE FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
if ($stmt->rowCount() > 0) {
return ['success' => true, 'message' => '已取消收藏'];
} else {
return ['success' => false, 'message' => '未收藏'];
}
} catch (Exception $e) {
error_log('UrlNav: 取消收藏失败: ' . $e->getMessage());
return ['success' => false, 'message' => '取消收藏失败: ' . $e->getMessage()];
}
}
/**
* 检查是否已收藏
*/
public static function isFavorite($feedId, $userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT id FROM urlnav_favorites WHERE user_id = ? AND feed_id = ?");
$stmt->execute([$userId, $feedId]);
$result = $stmt->fetch();
return $result ? true : false;
} catch (Exception $e) {
error_log('UrlNav: 检查收藏失败: ' . $e->getMessage());
return false;
}
}
/**
* 获取用户的收藏列表
*/
public static function getFavorites($userId = 0, $page = 1, $pageSize = 20, $search = '')
{
try {
$db = self::getDbConnection();
// 构建查询条件
$whereClause = "WHERE f.user_id = ?";
$params = [$userId];
if ($search) {
$whereClause .= " AND (f.feed_title LIKE ? OR f.feed_description LIKE ? OR f.full_content LIKE ? OR f.site_title LIKE ?)";
$searchTerm = "%{$search}%";
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
$params[] = $searchTerm;
}
// 计算总数
$countSql = "SELECT COUNT(*) as total FROM urlnav_favorites f $whereClause";
$countStmt = $db->prepare($countSql);
$countStmt->execute($params);
$countResult = $countStmt->fetch(PDO::FETCH_ASSOC);
$total = $countResult['total'];
// 计算分页
$offset = ($page - 1) * $pageSize;
// 获取数据
$sql = "SELECT
f.id,
f.feed_id as original_feed_id,
f.feed_title,
f.feed_link,
f.feed_description,
f.full_content,
f.pub_date,
f.site_title,
f.site_url,
f.category_name,
f.favorited_at
FROM urlnav_favorites f
$whereClause
ORDER BY f.favorited_at DESC
LIMIT ? OFFSET ?";
$stmt = $db->prepare($sql);
$stmtParams = array_merge($params, [$pageSize, $offset]);
$stmt->execute($stmtParams);
$feeds = $stmt->fetchAll(PDO::FETCH_ASSOC);
return [
'success' => true,
'total' => $total,
'totalPages' => ceil($total / $pageSize),
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => $feeds
];
} catch (Exception $e) {
error_log('UrlNav: 获取收藏列表失败: ' . $e->getMessage());
return [
'success' => false,
'message' => '获取收藏失败',
'total' => 0,
'totalPages' => 0,
'currentPage' => $page,
'pageSize' => $pageSize,
'data' => []
];
}
}
/**
* 获取收藏统计
*/
public static function getFavoriteStats($userId = 0)
{
try {
$db = self::getDbConnection();
$stmt = $db->prepare("SELECT COUNT(*) as total FROM urlnav_favorites WHERE user_id = ?");
$stmt->execute([$userId]);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return [
'total' => $result['total'] ?? 0
];
} catch (Exception $e) {
error_log('UrlNav: 获取收藏统计失败: ' . $e->getMessage());
return ['total' => 0];
}
}
}
/**
* RSS管理器类 - 封装RSS相关操作
*/
class UrlNav_RssManager
{
private $db;
public function __construct()
{
$this->db = UrlNav_Plugin::getDbConnection();
}
/**
* 智能刷新策略
*/
public function smartRefresh($urlId, $force = false)
{
$url = $this->getUrlInfo($urlId);
if (!$url) {
throw new Exception('URL不存在');
}
// 检查是否需要刷新
if (!$force && !$this->shouldRefresh($url)) {
return array('skipped' => true, 'reason' => '未到刷新时间');
}
// 执行刷新
return $this->refreshUrl($url);
}
private function getUrlInfo($urlId)
{
$stmt = $this->db->prepare("SELECT * FROM urlnav_urls WHERE id = ?");
$stmt->execute(array($urlId));
return $stmt->fetch(PDO::FETCH_ASSOC);
}
private function shouldRefresh($url)
{
$config = UrlNav_Plugin::getConfig();
$refreshInterval = intval($config->rssRefresh ?? 3600);
// 如果从未刷新过,需要刷新
if (empty($url['last_refresh'])) {
return true;
}
$lastRefresh = strtotime($url['last_refresh']);
$currentTime = time();
// 检查是否达到刷新间隔
if (($currentTime - $lastRefresh) >= $refreshInterval) {
return true;
}
// 如果最近失败次数多,降低刷新频率
$failureRate = $url['failure_count'] / max(1, $url['refresh_count']);
if ($failureRate > 0.5) {
// 失败率超过50%,延长刷新间隔
$extendedInterval = $refreshInterval * 3;
return ($currentTime - $lastRefresh) >= $extendedInterval;
}
return false;
}
private function refreshUrl($url)
{
// 这里可以实现单个URL的刷新逻辑
// 实际实现可以调用UrlNav_Plugin::parseRssFeed等方法
return array('success' => true);
}
}
?>