From 17edecceccefd77342516a3efc9128b942dfa51b Mon Sep 17 00:00:00 2001 From: hzm <934585316@qq.com> Date: Tue, 24 Feb 2026 20:26:32 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20OCR=20=E6=94=AF=E6=8C=81=E6=90=9C?= =?UTF-8?q?=E7=B4=A2=E5=9F=BA=E9=87=91=E5=90=8D=E7=A7=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/fund.js | 68 ++++++++++++++++++++++++++++++++++++++++++++++++- app/page.jsx | 42 ++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/app/api/fund.js b/app/api/fund.js index be41861..b5a3155 100644 --- a/app/api/fund.js +++ b/app/api/fund.js @@ -449,6 +449,72 @@ export const submitFeedback = async (formData) => { return response.json(); }; +// 使用智谱 GLM 从 OCR 文本中抽取基金名称 +export const extractFundNamesWithLLM = async (ocrText) => { + const apiKey = '8df8ccf74a174722847c83b7e222f2af.4A39rJvUeBVDmef1'; + if (!apiKey || !ocrText) return []; + + try { + const models = ['glm-4.5-flash', 'glm-4.7-flash']; + const model = models[Math.floor(Math.random() * models.length)]; + + const resp = await fetch('https://open.bigmodel.cn/api/paas/v4/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, + messages: [ + { + role: 'user', + content: + '你是一个基金 OCR 文本解析助手。' + + '从下面的 OCR 文本中抽取其中出现的「基金名称列表」。' + + '要求:1)基金名称一般为中文,中间不能有空字符串,可包含部分英文或括号' + + '2)名称后面通常会跟着金额或持有金额(数字,可能带千分位逗号和小数);' + + '3)忽略无关信息,只返回你判断为基金名称的字符串;' + + '4)去重后输出。输出格式:严格返回 JSON,如 {"fund_names": ["基金名称1","基金名称2"]},不要输出任何多余说明', + }, + { + role: 'user', + content: String(ocrText), + }, + ], + temperature: 0.2, + max_tokens: 1024, + thinking: { + type: 'disabled', + }, + }), + }); + + if (!resp.ok) { + return []; + } + + const data = await resp.json(); + let content = data?.choices?.[0]?.message?.content?.match(/\{[\s\S]*?\}/)?.[0]; + if (!content || typeof content !== 'string') return []; + + let parsed; + try { + parsed = JSON.parse(content); + } catch { + return []; + } + + const names = parsed?.fund_names; + if (!Array.isArray(names)) return []; + return names + .map((n) => (typeof n === 'string' ? n.trim().replaceAll(' ','') : '')) + .filter(Boolean); + } catch (e) { + return []; + } +}; + let historyQueue = Promise.resolve(); export const fetchFundHistory = async (code, range = '1m') => { @@ -498,7 +564,7 @@ export const fetchFundHistory = async (code, range = '1m') => { // Fetch first page to get metadata const firstUrl = `https://fundf10.eastmoney.com/F10DataApi.aspx?type=lsjz&code=${code}&page=${page}&per=${per}&sdate=${sdate}&edate=${edate}`; await loadScript(firstUrl); - + if (!window.apidata || !window.apidata.content || window.apidata.content.includes('暂无数据')) { resolve([]); return; diff --git a/app/page.jsx b/app/page.jsx index 354f51c..c95b488 100644 --- a/app/page.jsx +++ b/app/page.jsx @@ -39,7 +39,7 @@ import WeChatModal from "./components/WeChatModal"; import githubImg from "./assets/github.svg"; import { supabase, isSupabaseConfigured } from './lib/supabase'; import { recordValuation, getAllValuationSeries, clearFund } from './lib/valuationTimeseries'; -import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds } from './api/fund'; +import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds, extractFundNamesWithLLM } from './api/fund'; import packageJson from '../package.json'; dayjs.extend(utc); @@ -1043,7 +1043,7 @@ export default function HomePage() { for (const base of cdnBases) { for (const coreFile of coreCandidates) { try { - worker = await createWorker('eng', 1, { + worker = await createWorker('chi_sim+eng', 1, { workerPath: `${base}/tesseract.js@v5.1.1/dist/worker.min.js`, corePath: `${base}/tesseract.js-core@v5.1.1/${coreFile}`, }); @@ -1086,6 +1086,7 @@ export default function HomePage() { }; const allCodes = new Set(); + const allNames = new Set(); for (let i = 0; i < files.length; i++) { if (abortScanRef.current) break; @@ -1111,6 +1112,21 @@ export default function HomePage() { } const matches = text.match(/\b\d{6}\b/g) || []; matches.forEach(c => allCodes.add(c)); + + // 如果当前图片中没有识别出基金编码,尝试从文本中提取可能的中文基金名称(调用 GLM 接口) + if (!matches.length && text) { + let parsedNames = []; + try { + parsedNames = await extractFundNamesWithLLM(text); + } catch (e) { + parsedNames = []; + } + parsedNames.forEach((name) => { + if (name && typeof name === 'string') { + allNames.add(name.trim()); + } + }); + } } if (abortScanRef.current) { @@ -1118,6 +1134,28 @@ export default function HomePage() { return; } + // 如果所有截图中都没有识别出基金编码,尝试使用识别到的中文名称去搜索基金 + if (allCodes.size === 0 && allNames.size > 0) { + const names = Array.from(allNames); + setScanProgress({ stage: 'verify', current: 0, total: names.length }); + for (let i = 0; i < names.length; i++) { + if (abortScanRef.current) break; + const name = names[i]; + setScanProgress(prev => ({ ...prev, current: i + 1 })); + try { + const list = await searchFundsWithTimeout(name, 8000); + // 只有当搜索结果「有且仅有一条」时,才认为名称匹配是唯一且有效的 + if (Array.isArray(list) && list.length === 1) { + const found = list[0]; + if (found && found.CODE) { + allCodes.add(found.CODE); + } + } + } catch (e) { + } + } + } + const codes = Array.from(allCodes).sort(); setScanProgress({ stage: 'verify', current: 0, total: codes.length });