diff --git a/background.js b/background.js index c717c48..aa5d856 100644 --- a/background.js +++ b/background.js @@ -18,24 +18,157 @@ chrome.action.onClicked.addListener((tab) => { }); }); -chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { - if (!message || message.type !== "YFB_ANALYZE_CANDIDATE") { +chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => { + if (!message?.type) { return false; } - void analyzeCandidate(message.payload) - .then((data) => { - sendResponse({ ok: true, data }); - }) - .catch((error) => { - sendResponse({ - ok: false, - error: error instanceof Error ? error.message : "AI 分析失败" + if (message.type === "YFB_ANALYZE_CANDIDATE") { + void analyzeCandidate(message.payload) + .then((data) => { + sendResponse({ ok: true, data }); + }) + .catch((error) => { + sendResponse({ + ok: false, + error: error instanceof Error ? error.message : "AI 分析失败" + }); }); + + return true; + } + + if (message.type === "YFB_PROCESS_DETAIL_IN_HIDDEN_TAB") { + void processDetailInHiddenTab(message.payload) + .then((data) => { + sendResponse({ ok: true, data }); + }) + .catch((error) => { + sendResponse({ + ok: false, + error: error instanceof Error ? error.message : "详情抓取失败" + }); + }); + + return true; + } + + return false; +}); + +async function processDetailInHiddenTab(payload) { + const detailUrl = String(payload?.detailUrl || "").trim(); + const rowMeta = payload?.rowMeta || {}; + + if (!detailUrl) { + throw new Error("缺少详情页地址"); + } + + if (!isSupportedDetailUrl(detailUrl)) { + throw new Error("详情页地址不受支持"); + } + + const tab = await chrome.tabs.create({ + url: detailUrl, + active: false + }); + + if (!tab?.id) { + throw new Error("隐藏详情标签页创建失败"); + } + + try { + await waitForTabComplete(tab.id, 20000); + await waitForDetailWorkerReady(tab.id, 12000); + const response = await sendMessageToTab(tab.id, { + type: "YFB_RUN_DETAIL_EXTRACTION", + payload: { + rowMeta, + detailUrl + } }); - return true; -}); + if (!response?.ok || !response.data) { + throw new Error(response?.error || "详情提取失败"); + } + + return response.data; + } finally { + await closeTabQuietly(tab.id); + } +} + +function isSupportedDetailUrl(url) { + try { + const parsed = new URL(url); + return /(^|\.)yfbzb\.com$/i.test(parsed.hostname) || /(^|\.)qianlima\.com$/i.test(parsed.hostname); + } catch (error) { + return false; + } +} + +async function waitForTabComplete(tabId, timeoutMs) { + const tab = await chrome.tabs.get(tabId); + if (tab?.status === "complete") { + return; + } + + await new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + chrome.tabs.onUpdated.removeListener(handleUpdated); + reject(new Error("详情页加载超时")); + }, timeoutMs); + + const handleUpdated = (updatedTabId, changeInfo) => { + if (updatedTabId !== tabId || changeInfo.status !== "complete") { + return; + } + + clearTimeout(timeoutId); + chrome.tabs.onUpdated.removeListener(handleUpdated); + resolve(); + }; + + chrome.tabs.onUpdated.addListener(handleUpdated); + }); +} + +async function waitForDetailWorkerReady(tabId, timeoutMs) { + const startTime = Date.now(); + + while (Date.now() - startTime < timeoutMs) { + const response = await sendMessageToTab(tabId, { type: "YFB_DETAIL_WORKER_PING" }); + if (response?.ok) { + return; + } + await delay(200); + } + + throw new Error("详情页脚本未就绪"); +} + +async function sendMessageToTab(tabId, message) { + try { + return await chrome.tabs.sendMessage(tabId, message); + } catch (error) { + return { + ok: false, + error: error instanceof Error ? error.message : "标签页通信失败" + }; + } +} + +async function closeTabQuietly(tabId) { + try { + await chrome.tabs.remove(tabId); + } catch (error) { + void error; + } +} + +async function delay(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} async function analyzeCandidate(payload) { if (!CONFIG.apiKey) { diff --git a/content.js b/content.js index 32cc4d3..74cf494 100644 --- a/content.js +++ b/content.js @@ -11,8 +11,115 @@ const BANNER_ID = "yfb-bid-assistant-banner"; const KEYWORD_MARK_CLASS = "yfb-keyword-highlight"; const MAX_LOG_ENTRIES = Number(CONFIG.maxLogEntries) || 80; - const DEFAULT_MAX_PAGES = 10; - const LEGACY_DEFAULT_MAX_PAGES = 3; + + const DB_NAME = "YfbBidHistoryDB"; + const STORE_NAME = "CrawlHistory"; + + function openDB() { + return new Promise((resolve, reject) => { + const request = indexedDB.open(DB_NAME, 1); + request.onerror = () => reject(request.error); + request.onsuccess = () => resolve(request.result); + request.onupgradeneeded = (event) => { + const db = event.target.result; + if (!db.objectStoreNames.contains(STORE_NAME)) { + const store = db.createObjectStore(STORE_NAME, { keyPath: "id" }); + store.createIndex("publishTime", "publishTime", { unique: false }); + } + }; + }); + } + + async function getHistoryRecord(id) { + if(!id) return null; + const db = await openDB(); + return new Promise((resolve, reject) => { + const tx = db.transaction(STORE_NAME, "readonly"); + const store = tx.objectStore(STORE_NAME); + const req = store.get(id); + req.onsuccess = () => resolve(req.result); + req.onerror = () => reject(req.error); + }); + } + + async function saveHistoryRecord(record) { + if(!record.id) return; + const db = await openDB(); + return new Promise((resolve, reject) => { + const tx = db.transaction(STORE_NAME, "readwrite"); + const store = tx.objectStore(STORE_NAME); + const req = store.put({ + id: record.id, + url: record.url || record.id || "", + title: record.title || "", + publishTime: record.publishTime || "", + processedAt: Date.now() + }); + req.onsuccess = () => resolve(); + req.onerror = () => reject(req.error); + }); + } + + async function clearOldHistory() { + try { + const db = await openDB(); + const threeMonthsAgo = Date.now() - 3 * 30 * 24 * 60 * 60 * 1000; + return new Promise((resolve, reject) => { + const tx = db.transaction(STORE_NAME, "readwrite"); + const store = tx.objectStore(STORE_NAME); + const req = store.openCursor(); + req.onsuccess = (e) => { + const cursor = e.target.result; + if (cursor) { + if (cursor.value.processedAt < threeMonthsAgo) { + cursor.delete(); + } + cursor.continue(); + } else { + resolve(); + } + }; + req.onerror = () => reject(req.error); + }); + } catch(e) { + console.warn("清理历史记录失败", e); + } + } + + function parsePublishTimeToTs(timeStr) { + if (!timeStr) return 0; + timeStr = timeStr.trim(); + const now = Date.now(); + if (timeStr.includes("刚刚")) return now; + if (timeStr.includes("分钟前")) { + const min = parseInt(timeStr.replace(/\D/g, "")) || 0; + return now - min * 60000; + } + if (timeStr.includes("小时前")) { + const hr = parseInt(timeStr.replace(/\D/g, "")) || 0; + return now - hr * 3600000; + } + if (timeStr.includes("今天")) return new Date(new Date().setHours(0,0,0,0)).getTime(); + if (timeStr.includes("昨日") || timeStr.includes("昨天")) { + return new Date(new Date().setHours(0,0,0,0)).getTime() - 86400000; + } + const parsed = new Date(timeStr.replace(/\./g, "-")).getTime(); + if (!isNaN(parsed)) return parsed; + return 0; + } + + function isTimeInRange(timeTs, range) { + if (range === "all" || !range) return true; + if (!timeTs) return true; + const now = Date.now(); + const todayStart = new Date(new Date().setHours(0,0,0,0)).getTime(); + if (range === "today") return timeTs >= todayStart; + if (range === "24h") return timeTs >= (now - 24 * 3600000); + if (range === "3d") return timeTs >= (now - 3 * 24 * 3600000); + if (range === "1w") return timeTs >= (now - 7 * 24 * 3600000); + return true; + } + const LIST_ROW_SELECTOR = "tr.el-table__row"; const LIST_CARD_ROW_SELECTOR = ".list > div"; const LIST_TITLE_SELECTOR = ".color1879F7.pointer, .color1879F7.textEll.pointer"; @@ -146,10 +253,9 @@ stopRequested: false, panelCollapsed: false, panelHidden: false, - hasCustomMaxPages: false, statusText: "等待开始", settings: { - maxPages: DEFAULT_MAX_PAGES, + maxPages: 3, delayMs: 300 }, stats: { @@ -438,15 +544,8 @@ state.panelCollapsed = Boolean(saved.panelCollapsed); state.panelHidden = Boolean(saved.panelHidden); - state.hasCustomMaxPages = Boolean(saved.hasCustomMaxPages); state.statusText = saved.statusText || state.statusText; state.settings = { ...state.settings, ...(saved.settings || {}) }; - if ( - !state.hasCustomMaxPages && - (!Number.isFinite(Number(state.settings.maxPages)) || Number(state.settings.maxPages) === LEGACY_DEFAULT_MAX_PAGES) - ) { - state.settings.maxPages = DEFAULT_MAX_PAGES; - } state.stats = { ...state.stats, ...(saved.stats || {}) }; state.results = Array.isArray(saved.results) ? saved.results : []; state.rowStatusById = saved.rowStatusById || {}; @@ -475,6 +574,16 @@ +
+ + +
@@ -522,6 +631,16 @@
+
+ + +
@@ -550,6 +669,7 @@ ui.pageValue = panel.querySelector("[data-role='page']"); ui.rowValue = panel.querySelector("[data-role='row']"); ui.maxPagesInput = panel.querySelector("#yfb-max-pages"); + ui.timeRangeInput = panel.querySelector("#yfb-time-range"); ui.delayInput = panel.querySelector("#yfb-delay-ms"); ui.startButton = panel.querySelector("[data-role='start']"); ui.subscribeAllButton = panel.querySelector("[data-role='subscribe-all']"); @@ -559,9 +679,11 @@ ui.toggleButton = panel.querySelector(".yfb-panel-toggle"); ui.maxPagesInput.value = String(state.settings.maxPages); + if(ui.timeRangeInput && state.settings.timeRange) { ui.timeRangeInput.value = state.settings.timeRange; } ui.delayInput.value = String(state.settings.delayMs); ui.maxPagesInput.addEventListener("change", handleSettingsChange); + if (ui.timeRangeInput) { ui.timeRangeInput.addEventListener("change", handleSettingsChange); } ui.delayInput.addEventListener("change", handleSettingsChange); ui.startButton.addEventListener("click", () => { void startScan(); }); ui.subscribeAllButton.addEventListener("click", () => { void runSubscriptionOnly(); }); @@ -614,19 +736,65 @@ } function bindRuntimeMessages() { - chrome.runtime.onMessage.addListener((message) => { + chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + void sender; + if (!message) { - return; + return false; } if (message.type === "YFB_TOGGLE_PANEL") { state.panelHidden = !state.panelHidden; refreshView(); void persistState(); + return false; } + + if (message.type === "YFB_DETAIL_WORKER_PING") { + sendResponse({ ok: true, isDetailPage: detectDetailPage() }); + return false; + } + + if (message.type === "YFB_RUN_DETAIL_EXTRACTION") { + void runDetailWorkerExtraction(message.payload) + .then((data) => { + sendResponse({ ok: true, data }); + }) + .catch((error) => { + sendResponse({ + ok: false, + error: error instanceof Error ? error.message : "详情提取失败" + }); + }); + return true; + } + + return false; }); } + async function runDetailWorkerExtraction(payload) { + const rowMeta = payload?.rowMeta || {}; + const detailUrl = normalizeUrl(payload?.detailUrl || location.href); + + await waitForDetailPage(); + await waitForUiSettled(false); + dismissKnownDialogs(); + await sleep(120); + + const detailRecord = extractDetailRecord({ + ...rowMeta, + detailUrl + }); + detailRecord.detailUrl = detailUrl || detailRecord.detailUrl; + + const decision = await analyzeRecord(detailRecord); + return { + detailRecord, + decision + }; + } + async function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } @@ -646,6 +814,7 @@ } state.isRunning = true; + await clearOldHistory(); state.stopRequested = false; setStatus("正在处理订阅分组,请稍候..."); log("开始执行订阅分组全选测试。", "info"); @@ -723,13 +892,11 @@ } function handleSettingsChange() { - const previousMaxPages = state.settings.maxPages; const maxPages = clampNumber(ui.maxPagesInput.value, 1, 200, state.settings.maxPages); const delayMs = clampNumber(ui.delayInput.value, 200, 10000, state.settings.delayMs); state.settings.maxPages = maxPages; state.settings.delayMs = delayMs; - state.hasCustomMaxPages = state.hasCustomMaxPages || previousMaxPages !== maxPages; ui.maxPagesInput.value = String(maxPages); ui.delayInput.value = String(delayMs); void persistState(); @@ -771,6 +938,7 @@ await doSubscribeAll(); state.isRunning = true; + await clearOldHistory(); state.stopRequested = false; setStatus(`准备开始扫描,最多 ${state.settings.maxPages} 页。`); log(`开始扫描,最多 ${state.settings.maxPages} 页,步进延迟 ${state.settings.delayMs}ms。`, "info"); @@ -792,7 +960,24 @@ log(`第 ${state.stats.currentPage} 页识别到 ${rows.length} 条记录。`, "info"); + let outOfRangeCount = 0; for (let rowIndex = 0; rowIndex < rows.length; rowIndex += 1) { + const rMeta = rows[rowIndex]; + if (state.settings.timeRange && state.settings.timeRange !== 'all') { + const ts = parsePublishTimeToTs(rMeta.publishTime); + if (ts > 0 && !isTimeInRange(ts, state.settings.timeRange)) { + outOfRangeCount++; + if (outOfRangeCount >= 3) { + log("发现连续超出时间范围的记录,停止任务。", "warning"); + state.stopRequested = true; + updateRowStatus(rMeta.id, "skip", "超时跳过"); + break; + } + } else { + outOfRangeCount = 0; + } + } + throwIfStopped(); state.stats.currentIndex = rowIndex + 1; refreshView(); @@ -851,6 +1036,19 @@ return; } + try { + const hist = await getHistoryRecord(rowMeta.url || rowMeta.id); + if (hist) { + state.stats.scanned += 1; + updateRowStatus(rowMeta.id, "skip", "已在历史记录"); + log(`历史记录跳过:${rowMeta.title}`, "info"); + state.stats.hits = state.results.length; + refreshView(); + await persistState(); + return; + } + } catch(e) { console.warn(e); } + await clickElement(rowMeta.titleEl); await sleep(state.settings.delayMs); await waitForDetailPage(); @@ -859,6 +1057,7 @@ const detailRecord = extractDetailRecord(rowMeta); const decision = await analyzeRecord(detailRecord); state.stats.scanned += 1; + try { await saveHistoryRecord(detailRecord); } catch(e) { console.warn(e); } if (decision.isRelevant) { addResult(detailRecord, decision); @@ -1081,11 +1280,44 @@ type, region, publishTime, + url: deriveDetailUrlFromRow(rowEl, titleEl), previewText, previewKeywordHints: collectKeywordHints([title, type, region, publishTime, previewText].join("\n")) }; } + function deriveDetailUrlFromRow(rowEl, titleEl) { + const candidates = [ + titleEl, + titleEl?.closest?.("a[href]"), + rowEl?.querySelector?.("a[href]"), + rowEl?.querySelector?.("[href]") + ]; + + for (const candidate of candidates) { + const href = candidate?.href || candidate?.getAttribute?.("href") || ""; + const normalized = normalizeUrl(href); + if (normalized) { + return normalized; + } + } + + return ""; + } + + function normalizeUrl(url) { + const value = String(url || "").trim(); + if (!value) { + return ""; + } + + try { + return new URL(value, location.href).href; + } catch (error) { + return ""; + } + } + function isLikelyNoticeType(text) { return /(公告|采购|招标|中标|商机|项目)/.test(text) && text.length <= 12; } @@ -1192,68 +1424,6 @@ return normalizeText(result); } - function normalizeSummaryCompareText(text) { - return normalizeText(text).replace(/[\s,,。;、::\-()()[\]【】"'“”‘’《》]/g, ""); - } - - function sanitizeSummaryCandidate(text, detailRecord) { - let result = normalizeText(removeBoilerplateText(String(text || ""), detailRecord?.title || "")); - if (!result) { - return ""; - } - - const title = normalizeText(detailRecord?.title || ""); - if (title && result.startsWith(title)) { - result = normalizeText(result.slice(title.length)); - } - - const prefixPatterns = [ - /^(发布时间|发布日期|公告时间|时间|地区|项目编号|项目名称|项目概况|项目简介|招标编号|采购编号|预算金额|预估金额|招标单位|招标人|采购单位|业主单位|代理单位|代理机构|报名截止时间|投标截止时间|开标时间|开标日期|公告类型)\s*[::]?\s*/i, - /^(\d{4}[./-]\d{1,2}[./-]\d{1,2}|\d{1,2}[./-]\d{1,2})\s*/, - /^([一二三四五六七八九十]+、|\(?[一二三四五六七八九十]+\)|[0-9]+[、.])\s*/ - ]; - - let previous = ""; - while (result && result !== previous) { - previous = result; - prefixPatterns.forEach((pattern) => { - result = normalizeText(result.replace(pattern, "")); - }); - } - - return result; - } - - function isValidSummaryCandidate(text, detailRecord) { - const candidate = normalizeText(text); - if (!candidate || candidate.length < 12) { - return false; - } - - const candidateComparable = normalizeSummaryCompareText(candidate); - const titleComparable = normalizeSummaryCompareText(detailRecord?.title || ""); - if (!candidateComparable || candidateComparable === titleComparable) { - return false; - } - - if (/^[\d\s,,。;、::./\-]+$/.test(candidate)) { - return false; - } - - return !/^(发布时间|发布日期|公告时间|时间|地区|项目编号|项目名称|项目概况|项目简介|招标编号|采购编号|预算金额|预估金额|招标单位|招标人|采购单位|业主单位|代理单位|代理机构|报名截止时间|投标截止时间|开标时间|开标日期|公告类型)\b/i.test(candidate); - } - - function buildFieldFallbackSummary(detailRecord) { - const parts = [ - detailRecord?.bidder ? `招标单位:${detailRecord.bidder}` : "", - detailRecord?.agency ? `代理单位:${detailRecord.agency}` : "", - detailRecord?.signupDeadline ? `报名截止:${detailRecord.signupDeadline}` : "", - detailRecord?.bidDeadline ? `投标截止:${detailRecord.bidDeadline}` : "" - ].filter(Boolean); - - return parts.length > 0 ? limitLength(parts.join(";"), 60) : ""; - } - function collectKeywordHints(text) { const normalized = normalizeText(text); const institutions = collectHits(normalized, KEYWORDS.institutions); @@ -1555,15 +1725,8 @@ 置信度分数: item.confidence || 0 })); - const exportRows = state.results.map((item) => ({ - ["标题"]: item.title, - ["简述"]: item.summary || "", - ["AI分类"]: item.category || "", - ["置信度"]: item.confidence || 0 - })); - const workbook = window.XLSX.utils.book_new(); - const worksheet = window.XLSX.utils.json_to_sheet(exportRows); + const worksheet = window.XLSX.utils.json_to_sheet(rows); window.XLSX.utils.book_append_sheet(workbook, worksheet, "命中结果"); const fileBuffer = window.XLSX.write(workbook, { @@ -1679,6 +1842,16 @@
+
+ + +
@@ -1717,6 +1890,7 @@ ui.pageValue = panel.querySelector("[data-role='page']"); ui.rowValue = panel.querySelector("[data-role='row']"); ui.maxPagesInput = panel.querySelector("#yfb-max-pages"); + ui.timeRangeInput = panel.querySelector("#yfb-time-range"); ui.delayInput = panel.querySelector("#yfb-delay-ms"); ui.startButton = panel.querySelector("[data-role='start']"); ui.subscribeAllButton = panel.querySelector("[data-role='subscribe-all']"); @@ -1726,9 +1900,11 @@ ui.toggleButton = panel.querySelector(".yfb-panel-toggle"); ui.maxPagesInput.value = String(state.settings.maxPages); + if(ui.timeRangeInput && state.settings.timeRange) { ui.timeRangeInput.value = state.settings.timeRange; } ui.delayInput.value = String(state.settings.delayMs); ui.maxPagesInput.addEventListener("change", handleSettingsChange); + if (ui.timeRangeInput) { ui.timeRangeInput.addEventListener("change", handleSettingsChange); } ui.delayInput.addEventListener("change", handleSettingsChange); ui.startButton.addEventListener("click", () => { void startScan(); }); @@ -1759,6 +1935,7 @@ } state.isRunning = true; + await clearOldHistory(); state.stopRequested = false; setStatus(`准备开始扫描,最多 ${state.settings.maxPages} 页。`); log(`开始扫描,最多 ${state.settings.maxPages} 页,步进延迟 ${state.settings.delayMs}ms。`, "info"); @@ -1782,7 +1959,24 @@ log(`第 ${state.stats.currentPage} 页识别到 ${rows.length} 条记录。`, "info"); + let outOfRangeCount = 0; for (let rowIndex = 0; rowIndex < rows.length; rowIndex += 1) { + const rMeta = rows[rowIndex]; + if (state.settings.timeRange && state.settings.timeRange !== 'all') { + const ts = parsePublishTimeToTs(rMeta.publishTime); + if (ts > 0 && !isTimeInRange(ts, state.settings.timeRange)) { + outOfRangeCount++; + if (outOfRangeCount >= 3) { + log("发现连续超出时间范围的记录,停止任务。", "warning"); + state.stopRequested = true; + updateRowStatus(rMeta.id, "skip", "超时跳过"); + break; + } + } else { + outOfRangeCount = 0; + } + } + throwIfStopped(); state.stats.currentIndex = rowIndex + 1; refreshView(); @@ -2034,17 +2228,55 @@ return; } - await clickElement(rowMeta.titleEl); - await sleep(state.settings.delayMs); - await waitForDetailPage(); - await waitForUiSettled(); - dismissKnownDialogs(); - await sleep(120); + const detailUrl = await ensureDetailUrlForRow(rowMeta); + if (!detailUrl) { + state.stats.scanned += 1; + updateRowStatus(rowMeta.id, "error", "缺少详情地址"); + log(`处理失败:${rowMeta.title},未找到详情页地址`, "error"); + state.stats.hits = state.results.length; + refreshView(); + await persistState(); + restoreListHighlights(); + return; + } try { - const detailRecord = extractDetailRecord(rowMeta); - const decision = await analyzeRecord(detailRecord); + const hist = await getHistoryRecord(detailUrl || rowMeta.id); + if (hist) { + state.stats.scanned += 1; + updateRowStatus(rowMeta.id, "skip", "已在历史记录"); + log(`历史记录跳过:${rowMeta.title}`, "info"); + state.stats.hits = state.results.length; + refreshView(); + await persistState(); + return; + } + } catch(e) { console.warn(e); } + + try { + const response = await runtimeSendMessage({ + type: "YFB_PROCESS_DETAIL_IN_HIDDEN_TAB", + payload: { + detailUrl, + rowMeta: { + id: rowMeta.id, + title: rowMeta.title, + type: rowMeta.type, + region: rowMeta.region, + publishTime: rowMeta.publishTime, + url: detailUrl + } + } + }); + + if (!response?.ok || !response.data?.detailRecord || !response.data?.decision) { + throw new Error(response?.error || "记录处理失败"); + } + + const detailRecord = response.data.detailRecord; + const decision = response.data.decision; state.stats.scanned += 1; + try { await saveHistoryRecord(detailRecord); } catch(e) { console.warn(e); } if (decision.isRelevant) { addResult(detailRecord, decision); @@ -2065,18 +2297,52 @@ } } catch (error) { const message = error instanceof Error ? error.message : "记录处理失败"; + state.stats.scanned += 1; updateRowStatus(rowMeta.id, "error", "异常"); log(`处理失败:${rowMeta.title},${message}`, "error"); } finally { state.stats.hits = state.results.length; refreshView(); await persistState(); - await navigateBackToList(); - await sleep(state.settings.delayMs); restoreListHighlights(); } } + async function ensureDetailUrlForRow(rowMeta) { + const directUrl = normalizeUrl(rowMeta?.url || ""); + if (directUrl) { + return directUrl; + } + + return await captureDetailUrlViaNavigationFallback(rowMeta); + } + + async function captureDetailUrlViaNavigationFallback(rowMeta) { + if (!rowMeta?.titleEl) { + return ""; + } + + const listUrl = location.href; + await clickElement(rowMeta.titleEl); + await sleep(state.settings.delayMs); + await waitForDetailPage(); + const detailUrl = normalizeUrl(location.href); + await navigateBackToList(); + await sleep(state.settings.delayMs); + + const restoredRow = findRowById(rowMeta.id); + if (restoredRow) { + restoredRow.url = detailUrl; + rowMeta.url = detailUrl; + } + + if (!detailUrl || detailUrl === normalizeUrl(listUrl)) { + return ""; + } + + return detailUrl; + } + function extractDetailRecord(rowMeta) { const title = findTitleCandidate(rowMeta.title); const detailMeta = collectDetailMeta(); @@ -2591,17 +2857,9 @@ 置信度分数: item.confidence || 0 })); - const exportHeaders = ["标题", "简述", "AI分类", "置信度"]; - const exportRows = state.results.map((item) => ({ - ["标题"]: item.title, - ["简述"]: item.summary || "", - ["AI分类"]: item.category || "", - ["置信度"]: item.confidence || 0 - })); - const worksheet = window.XLSX.utils.json_to_sheet(exportRows, { header: exportHeaders }); + const worksheet = window.XLSX.utils.json_to_sheet(rows, { header: headers }); worksheet["!cols"] = [ { wch: 44 }, - { wch: 60 }, { wch: 18 }, { wch: 12 } ]; @@ -2669,7 +2927,6 @@ [STORAGE_KEY]: { panelCollapsed: state.panelCollapsed, panelHidden: state.panelHidden, - hasCustomMaxPages: state.hasCustomMaxPages, statusText: state.statusText, settings: state.settings, stats: state.stats, @@ -2780,92 +3037,6 @@ return parts.join(""); } - function isValidSummaryCandidate(text, detailRecord) { - const candidate = normalizeText(text); - if (!candidate || candidate.length < 12) { - return false; - } - - const title = normalizeText(detailRecord?.title || ""); - if (title && candidate.startsWith(title)) { - return false; - } - - const candidateComparable = normalizeSummaryCompareText(candidate); - const titleComparable = normalizeSummaryCompareText(title); - if (!candidateComparable || candidateComparable === titleComparable) { - return false; - } - - if (/^[\d\s,,。;、::./\-]+$/.test(candidate)) { - return false; - } - - return !/^(发布时间|发布日期|公告时间|时间|地区|项目编号|项目名称|项目概况|项目简介|招标编号|采购编号|预算金额|预估金额|招标单位|招标人|采购单位|业主单位|代理单位|代理机构|报名截止时间|投标截止时间|开标时间|开标日期|公告类型)\b/i.test(candidate); - } - - function buildLocalSummary(detailRecord) { - const candidateText = [detailRecord.announcementContent, detailRecord.detailText].filter(Boolean).join("\n"); - const candidateLines = candidateText - .split(/[\n。;]/) - .map((line) => sanitizeSummaryCandidate(line, detailRecord)) - .filter((line) => isValidSummaryCandidate(line, detailRecord)); - - return candidateLines.length > 0 ? limitLength(candidateLines[0], 60) : ""; - } - - function buildSummary(detailRecord, matchedKeywords = [], aiSummary = "") { - const normalizedAiSummary = normalizeText(removeBoilerplateText(String(aiSummary || ""), detailRecord?.title || "")); - if (isValidSummaryCandidate(normalizedAiSummary, detailRecord)) { - return limitLength(normalizedAiSummary, 60); - } - - const localSummary = buildLocalSummary(detailRecord); - if (localSummary) { - return localSummary; - } - - const fieldSummary = buildFieldFallbackSummary(detailRecord); - if (fieldSummary) { - return fieldSummary; - } - - return limitLength(detailRecord.title, 60); - } - - function addResult(detailRecord, decision) { - const result = { - id: detailRecord.id, - title: detailRecord.title, - summary: buildSummary(detailRecord, decision.matchedKeywords || [], decision.summary || decision.titleSummary || ""), - category: decision.category || "未命中", - institutionType: uniqueText(decision.institutionType || []), - matchedKeywords: uniqueText(decision.matchedKeywords || []), - confidence: Number(decision.confidence) || 0, - reason: decision.reason || "", - type: detailRecord.type, - region: detailRecord.region, - publishTime: detailRecord.publishTime, - detailUrl: detailRecord.detailUrl, - sourceUrl: detailRecord.sourceUrl, - attachmentNames: detailRecord.attachmentNames, - announcementContent: detailRecord.announcementContent, - projectNumber: detailRecord.projectNumber, - estimatedAmount: detailRecord.estimatedAmount, - bidder: detailRecord.bidder, - agency: detailRecord.agency, - signupDeadline: detailRecord.signupDeadline, - bidDeadline: detailRecord.bidDeadline - }; - - const existingIndex = state.results.findIndex((item) => item.id === result.id); - if (existingIndex >= 0) { - state.results.splice(existingIndex, 1, result); - } else { - state.results.push(result); - } - } - function escapeRegExp(text) { return String(text).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } diff --git a/manifest.json b/manifest.json index 97bf535..a3b5b5e 100644 --- a/manifest.json +++ b/manifest.json @@ -4,7 +4,8 @@ "version": "0.1.0", "description": "在乙方宝页面内自动翻页抓取、AI筛选、高亮并导出金融机构相关招标信息。", "permissions": [ - "storage" + "storage", + "tabs" ], "host_permissions": [ "*://*.yfbzb.com/*",