feat: OCR 支持搜索基金名称

This commit is contained in:
hzm
2026-02-24 20:26:32 +08:00
parent b9ee4546b7
commit 17edeccecc
2 changed files with 107 additions and 3 deletions

View File

@@ -449,6 +449,72 @@ export const submitFeedback = async (formData) => {
return response.json();
};
// 使用智谱 GLM 从 OCR 文本中抽取基金名称
export const extractFundNamesWithLLM = async (ocrText) => {
const apiKey = '8df8ccf74a174722847c83b7e222f2af.4A39rJvUeBVDmef1';
if (!apiKey || !ocrText) return [];
try {
const models = ['glm-4.5-flash', 'glm-4.7-flash'];
const model = models[Math.floor(Math.random() * models.length)];
const resp = await fetch('https://open.bigmodel.cn/api/paas/v4/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify({
model,
messages: [
{
role: 'user',
content:
'你是一个基金 OCR 文本解析助手。' +
'从下面的 OCR 文本中抽取其中出现的「基金名称列表」。' +
'要求1基金名称一般为中文中间不能有空字符串,可包含部分英文或括号' +
'2名称后面通常会跟着金额或持有金额数字可能带千分位逗号和小数' +
'3忽略无关信息只返回你判断为基金名称的字符串' +
'4去重后输出。输出格式严格返回 JSON如 {"fund_names": ["基金名称1","基金名称2"]},不要输出任何多余说明',
},
{
role: 'user',
content: String(ocrText),
},
],
temperature: 0.2,
max_tokens: 1024,
thinking: {
type: 'disabled',
},
}),
});
if (!resp.ok) {
return [];
}
const data = await resp.json();
let content = data?.choices?.[0]?.message?.content?.match(/\{[\s\S]*?\}/)?.[0];
if (!content || typeof content !== 'string') return [];
let parsed;
try {
parsed = JSON.parse(content);
} catch {
return [];
}
const names = parsed?.fund_names;
if (!Array.isArray(names)) return [];
return names
.map((n) => (typeof n === 'string' ? n.trim().replaceAll(' ','') : ''))
.filter(Boolean);
} catch (e) {
return [];
}
};
let historyQueue = Promise.resolve();
export const fetchFundHistory = async (code, range = '1m') => {

View File

@@ -39,7 +39,7 @@ import WeChatModal from "./components/WeChatModal";
import githubImg from "./assets/github.svg";
import { supabase, isSupabaseConfigured } from './lib/supabase';
import { recordValuation, getAllValuationSeries, clearFund } from './lib/valuationTimeseries';
import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds } from './api/fund';
import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds, extractFundNamesWithLLM } from './api/fund';
import packageJson from '../package.json';
dayjs.extend(utc);
@@ -1043,7 +1043,7 @@ export default function HomePage() {
for (const base of cdnBases) {
for (const coreFile of coreCandidates) {
try {
worker = await createWorker('eng', 1, {
worker = await createWorker('chi_sim+eng', 1, {
workerPath: `${base}/tesseract.js@v5.1.1/dist/worker.min.js`,
corePath: `${base}/tesseract.js-core@v5.1.1/${coreFile}`,
});
@@ -1086,6 +1086,7 @@ export default function HomePage() {
};
const allCodes = new Set();
const allNames = new Set();
for (let i = 0; i < files.length; i++) {
if (abortScanRef.current) break;
@@ -1111,6 +1112,21 @@ export default function HomePage() {
}
const matches = text.match(/\b\d{6}\b/g) || [];
matches.forEach(c => allCodes.add(c));
// 如果当前图片中没有识别出基金编码,尝试从文本中提取可能的中文基金名称(调用 GLM 接口)
if (!matches.length && text) {
let parsedNames = [];
try {
parsedNames = await extractFundNamesWithLLM(text);
} catch (e) {
parsedNames = [];
}
parsedNames.forEach((name) => {
if (name && typeof name === 'string') {
allNames.add(name.trim());
}
});
}
}
if (abortScanRef.current) {
@@ -1118,6 +1134,28 @@ export default function HomePage() {
return;
}
// 如果所有截图中都没有识别出基金编码,尝试使用识别到的中文名称去搜索基金
if (allCodes.size === 0 && allNames.size > 0) {
const names = Array.from(allNames);
setScanProgress({ stage: 'verify', current: 0, total: names.length });
for (let i = 0; i < names.length; i++) {
if (abortScanRef.current) break;
const name = names[i];
setScanProgress(prev => ({ ...prev, current: i + 1 }));
try {
const list = await searchFundsWithTimeout(name, 8000);
// 只有当搜索结果「有且仅有一条」时,才认为名称匹配是唯一且有效的
if (Array.isArray(list) && list.length === 1) {
const found = list[0];
if (found && found.CODE) {
allCodes.add(found.CODE);
}
}
} catch (e) {
}
}
}
const codes = Array.from(allCodes).sort();
setScanProgress({ stage: 'verify', current: 0, total: codes.length });