feat: 提升截图基金名称识别准确率

This commit is contained in:
hzm
2026-03-05 20:42:02 +08:00
parent 9cfac48b59
commit 873728a6a2
5 changed files with 104968 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
import { useCallback, useRef } from 'react';
import { cachedRequest, clearCachedRequest } from '../lib/cacheRequest';
const FUND_CODE_SEARCH_URL = 'https://fund.eastmoney.com/js/fundcode_search.js';
const FUND_LIST_CACHE_KEY = 'eastmoney_fundcode_search_list';
const FUND_LIST_CACHE_TIME = 24 * 60 * 60 * 1000;
const formatEastMoneyFundList = (rawList) => {
if (!Array.isArray(rawList)) return [];
return rawList
.map((item) => {
if (!Array.isArray(item)) return null;
const code = String(item[0] ?? '').trim();
const name = String(item[2] ?? '').trim();
if (!code || !name) return null;
return { code, name };
})
.filter(Boolean);
};
export const useFundFuzzyMatcher = () => {
const allFundFuseRef = useRef(null);
const allFundLoadPromiseRef = useRef(null);
const getAllFundFuse = useCallback(async () => {
if (allFundFuseRef.current) return allFundFuseRef.current;
if (allFundLoadPromiseRef.current) return allFundLoadPromiseRef.current;
allFundLoadPromiseRef.current = (async () => {
const [fuseModule, allFundList] = await Promise.all([
import('fuse.js'),
cachedRequest(
() =>
new Promise((resolve, reject) => {
if (typeof window === 'undefined' || typeof document === 'undefined' || !document.body) {
reject(new Error('NO_BROWSER_ENV'));
return;
}
const prevR = window.r;
const script = document.createElement('script');
script.src = `${FUND_CODE_SEARCH_URL}?_=${Date.now()}`;
script.async = true;
const cleanup = () => {
if (document.body.contains(script)) {
document.body.removeChild(script);
}
if (prevR === undefined) {
try {
delete window.r;
} catch (e) {
window.r = undefined;
}
} else {
window.r = prevR;
}
};
script.onload = () => {
const snapshot = Array.isArray(window.r) ? JSON.parse(JSON.stringify(window.r)) : [];
cleanup();
const parsed = formatEastMoneyFundList(snapshot);
if (!parsed.length) {
reject(new Error('PARSE_ALL_FUND_FAILED'));
return;
}
resolve(parsed);
};
script.onerror = () => {
cleanup();
reject(new Error('LOAD_ALL_FUND_FAILED'));
};
document.body.appendChild(script);
}),
FUND_LIST_CACHE_KEY,
{ cacheTime: FUND_LIST_CACHE_TIME }
),
]);
const Fuse = fuseModule.default;
const fuse = new Fuse(Array.isArray(allFundList) ? allFundList : [], {
keys: ['name', 'code'],
includeScore: true,
threshold: 0.5,
ignoreLocation: true,
minMatchCharLength: 2,
});
allFundFuseRef.current = fuse;
return fuse;
})();
try {
return await allFundLoadPromiseRef.current;
} catch (e) {
allFundLoadPromiseRef.current = null;
clearCachedRequest(FUND_LIST_CACHE_KEY);
throw e;
}
}, []);
const normalizeFundText = useCallback((value) => {
if (typeof value !== 'string') return '';
return value
.toUpperCase()
.replace(/[(]/g, '(')
.replace(/[)]/g, ')')
.replace(/[·•]/g, '')
.replace(/\s+/g, '')
.replace(/[^\u4e00-\u9fa5A-Z0-9()]/g, '');
}, []);
const parseFundQuerySignals = useCallback((rawName) => {
const normalized = normalizeFundText(rawName);
const hasETF = normalized.includes('ETF');
const hasLOF = normalized.includes('LOF');
const hasLink = normalized.includes('联接');
const shareMatch = normalized.match(/([A-Z])(?:类)?$/i);
const shareClass = shareMatch ? shareMatch[1].toUpperCase() : null;
const core = normalized
.replace(/基金/g, '')
.replace(/ETF联接/g, '')
.replace(/联接[A-Z]?/g, '')
.replace(/ETF/g, '')
.replace(/LOF/g, '')
.replace(/[A-Z](?:类)?$/g, '');
return {
normalized,
core,
hasETF,
hasLOF,
hasLink,
shareClass,
};
}, [normalizeFundText]);
const resolveFundCodeByFuzzy = useCallback(async (name) => {
const querySignals = parseFundQuerySignals(name);
if (!querySignals.normalized) return null;
const len = querySignals.normalized.length;
const strictThreshold = len <= 4 ? 0.16 : len <= 8 ? 0.22 : 0.28;
const relaxedThreshold = Math.min(0.45, strictThreshold + 0.16);
const scoreGapThreshold = len <= 5 ? 0.08 : 0.06;
const fuse = await getAllFundFuse();
const recalled = fuse.search(name, { limit: 50 });
if (!recalled.length) return null;
const stage1 = recalled.filter((item) => (item.score ?? 1) <= relaxedThreshold);
if (!stage1.length) return null;
const ranked = stage1
.map((item) => {
const candidateSignals = parseFundQuerySignals(item?.item?.name || '');
let finalScore = item.score ?? 1;
if (querySignals.hasETF) {
finalScore += candidateSignals.hasETF ? -0.04 : 0.2;
}
if (querySignals.hasLOF) {
finalScore += candidateSignals.hasLOF ? -0.04 : 0.2;
}
if (querySignals.hasLink) {
finalScore += candidateSignals.hasLink ? -0.03 : 0.18;
}
if (querySignals.shareClass) {
finalScore += candidateSignals.shareClass === querySignals.shareClass ? -0.03 : 0.18;
}
if (querySignals.core && candidateSignals.core) {
if (candidateSignals.core.includes(querySignals.core)) {
finalScore -= 0.06;
} else if (!querySignals.core.includes(candidateSignals.core)) {
finalScore += 0.06;
}
}
return { ...item, finalScore };
})
.sort((a, b) => a.finalScore - b.finalScore);
const top1 = ranked[0];
if (!top1 || top1.finalScore > strictThreshold) return null;
const top2 = ranked[1];
if (top2 && (top2.finalScore - top1.finalScore) < scoreGapThreshold) {
return null;
}
return top1?.item?.code || null;
}, [getAllFundFuse, parseFundQuerySignals]);
return {
resolveFundCodeByFuzzy,
};
};
export default useFundFuzzyMatcher;

View File

@@ -49,6 +49,7 @@ import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFu
import packageJson from '../package.json';
import PcFundTable from './components/PcFundTable';
import MobileFundTable from './components/MobileFundTable';
import { useFundFuzzyMatcher } from './hooks/useFundFuzzyMatcher';
dayjs.extend(utc);
dayjs.extend(timezone);
@@ -1173,6 +1174,7 @@ export default function HomePage() {
const abortScanRef = useRef(false); // 终止扫描标记
const fileInputRef = useRef(null);
const ocrWorkerRef = useRef(null);
const { resolveFundCodeByFuzzy } = useFundFuzzyMatcher();
const handleScanClick = () => {
setScanModalOpen(true);
@@ -1327,6 +1329,15 @@ export default function HomePage() {
if (found && found.CODE) {
allCodes.add(found.CODE);
}
} else {
// 使用 fuse.js 读取 Public 中的 allFunds 数据进行模糊匹配,补充搜索接口的不足
try {
const fuzzyCode = await resolveFundCodeByFuzzy(name);
if (fuzzyCode) {
allCodes.add(fuzzyCode);
}
} catch (e) {
}
}
} catch (e) {
}

10
package-lock.json generated
View File

@@ -19,6 +19,7 @@
"chart.js": "^4.5.1",
"dayjs": "^1.11.19",
"framer-motion": "^12.29.2",
"fuse.js": "^7.1.0",
"lodash": "^4.17.23",
"next": "^16.1.5",
"react": "18.3.1",
@@ -4149,6 +4150,15 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/fuse.js": {
"version": "7.1.0",
"resolved": "https://registry.npmmirror.com/fuse.js/-/fuse.js-7.1.0.tgz",
"integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==",
"license": "Apache-2.0",
"engines": {
"node": ">=10"
}
},
"node_modules/generator-function": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/generator-function/-/generator-function-2.0.1.tgz",

View File

@@ -22,6 +22,7 @@
"chart.js": "^4.5.1",
"dayjs": "^1.11.19",
"framer-motion": "^12.29.2",
"fuse.js": "^7.1.0",
"lodash": "^4.17.23",
"next": "^16.1.5",
"react": "18.3.1",

104742
public/allFund.json Normal file

File diff suppressed because it is too large Load Diff