feat: OCR 支持搜索基金名称
This commit is contained in:
@@ -449,6 +449,72 @@ export const submitFeedback = async (formData) => {
|
||||
return response.json();
|
||||
};
|
||||
|
||||
// 使用智谱 GLM 从 OCR 文本中抽取基金名称
|
||||
export const extractFundNamesWithLLM = async (ocrText) => {
|
||||
const apiKey = '8df8ccf74a174722847c83b7e222f2af.4A39rJvUeBVDmef1';
|
||||
if (!apiKey || !ocrText) return [];
|
||||
|
||||
try {
|
||||
const models = ['glm-4.5-flash', 'glm-4.7-flash'];
|
||||
const model = models[Math.floor(Math.random() * models.length)];
|
||||
|
||||
const resp = await fetch('https://open.bigmodel.cn/api/paas/v4/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content:
|
||||
'你是一个基金 OCR 文本解析助手。' +
|
||||
'从下面的 OCR 文本中抽取其中出现的「基金名称列表」。' +
|
||||
'要求:1)基金名称一般为中文,中间不能有空字符串,可包含部分英文或括号' +
|
||||
'2)名称后面通常会跟着金额或持有金额(数字,可能带千分位逗号和小数);' +
|
||||
'3)忽略无关信息,只返回你判断为基金名称的字符串;' +
|
||||
'4)去重后输出。输出格式:严格返回 JSON,如 {"fund_names": ["基金名称1","基金名称2"]},不要输出任何多余说明',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: String(ocrText),
|
||||
},
|
||||
],
|
||||
temperature: 0.2,
|
||||
max_tokens: 1024,
|
||||
thinking: {
|
||||
type: 'disabled',
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const data = await resp.json();
|
||||
let content = data?.choices?.[0]?.message?.content?.match(/\{[\s\S]*?\}/)?.[0];
|
||||
if (!content || typeof content !== 'string') return [];
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(content);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
const names = parsed?.fund_names;
|
||||
if (!Array.isArray(names)) return [];
|
||||
return names
|
||||
.map((n) => (typeof n === 'string' ? n.trim().replaceAll(' ','') : ''))
|
||||
.filter(Boolean);
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
let historyQueue = Promise.resolve();
|
||||
|
||||
export const fetchFundHistory = async (code, range = '1m') => {
|
||||
|
||||
42
app/page.jsx
42
app/page.jsx
@@ -39,7 +39,7 @@ import WeChatModal from "./components/WeChatModal";
|
||||
import githubImg from "./assets/github.svg";
|
||||
import { supabase, isSupabaseConfigured } from './lib/supabase';
|
||||
import { recordValuation, getAllValuationSeries, clearFund } from './lib/valuationTimeseries';
|
||||
import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds } from './api/fund';
|
||||
import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds, extractFundNamesWithLLM } from './api/fund';
|
||||
import packageJson from '../package.json';
|
||||
|
||||
dayjs.extend(utc);
|
||||
@@ -1043,7 +1043,7 @@ export default function HomePage() {
|
||||
for (const base of cdnBases) {
|
||||
for (const coreFile of coreCandidates) {
|
||||
try {
|
||||
worker = await createWorker('eng', 1, {
|
||||
worker = await createWorker('chi_sim+eng', 1, {
|
||||
workerPath: `${base}/tesseract.js@v5.1.1/dist/worker.min.js`,
|
||||
corePath: `${base}/tesseract.js-core@v5.1.1/${coreFile}`,
|
||||
});
|
||||
@@ -1086,6 +1086,7 @@ export default function HomePage() {
|
||||
};
|
||||
|
||||
const allCodes = new Set();
|
||||
const allNames = new Set();
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
if (abortScanRef.current) break;
|
||||
|
||||
@@ -1111,6 +1112,21 @@ export default function HomePage() {
|
||||
}
|
||||
const matches = text.match(/\b\d{6}\b/g) || [];
|
||||
matches.forEach(c => allCodes.add(c));
|
||||
|
||||
// 如果当前图片中没有识别出基金编码,尝试从文本中提取可能的中文基金名称(调用 GLM 接口)
|
||||
if (!matches.length && text) {
|
||||
let parsedNames = [];
|
||||
try {
|
||||
parsedNames = await extractFundNamesWithLLM(text);
|
||||
} catch (e) {
|
||||
parsedNames = [];
|
||||
}
|
||||
parsedNames.forEach((name) => {
|
||||
if (name && typeof name === 'string') {
|
||||
allNames.add(name.trim());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (abortScanRef.current) {
|
||||
@@ -1118,6 +1134,28 @@ export default function HomePage() {
|
||||
return;
|
||||
}
|
||||
|
||||
// 如果所有截图中都没有识别出基金编码,尝试使用识别到的中文名称去搜索基金
|
||||
if (allCodes.size === 0 && allNames.size > 0) {
|
||||
const names = Array.from(allNames);
|
||||
setScanProgress({ stage: 'verify', current: 0, total: names.length });
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
if (abortScanRef.current) break;
|
||||
const name = names[i];
|
||||
setScanProgress(prev => ({ ...prev, current: i + 1 }));
|
||||
try {
|
||||
const list = await searchFundsWithTimeout(name, 8000);
|
||||
// 只有当搜索结果「有且仅有一条」时,才认为名称匹配是唯一且有效的
|
||||
if (Array.isArray(list) && list.length === 1) {
|
||||
const found = list[0];
|
||||
if (found && found.CODE) {
|
||||
allCodes.add(found.CODE);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const codes = Array.from(allCodes).sort();
|
||||
setScanProgress({ stage: 'verify', current: 0, total: codes.length });
|
||||
|
||||
|
||||
Reference in New Issue
Block a user