feat: 提升截图基金名称识别准确率
This commit is contained in:
204
app/hooks/useFundFuzzyMatcher.js
Normal file
204
app/hooks/useFundFuzzyMatcher.js
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
import { useCallback, useRef } from 'react';
|
||||||
|
import { cachedRequest, clearCachedRequest } from '../lib/cacheRequest';
|
||||||
|
|
||||||
|
const FUND_CODE_SEARCH_URL = 'https://fund.eastmoney.com/js/fundcode_search.js';
|
||||||
|
const FUND_LIST_CACHE_KEY = 'eastmoney_fundcode_search_list';
|
||||||
|
const FUND_LIST_CACHE_TIME = 24 * 60 * 60 * 1000;
|
||||||
|
|
||||||
|
const formatEastMoneyFundList = (rawList) => {
|
||||||
|
if (!Array.isArray(rawList)) return [];
|
||||||
|
|
||||||
|
return rawList
|
||||||
|
.map((item) => {
|
||||||
|
if (!Array.isArray(item)) return null;
|
||||||
|
const code = String(item[0] ?? '').trim();
|
||||||
|
const name = String(item[2] ?? '').trim();
|
||||||
|
if (!code || !name) return null;
|
||||||
|
return { code, name };
|
||||||
|
})
|
||||||
|
.filter(Boolean);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const useFundFuzzyMatcher = () => {
|
||||||
|
const allFundFuseRef = useRef(null);
|
||||||
|
const allFundLoadPromiseRef = useRef(null);
|
||||||
|
|
||||||
|
const getAllFundFuse = useCallback(async () => {
|
||||||
|
if (allFundFuseRef.current) return allFundFuseRef.current;
|
||||||
|
if (allFundLoadPromiseRef.current) return allFundLoadPromiseRef.current;
|
||||||
|
|
||||||
|
allFundLoadPromiseRef.current = (async () => {
|
||||||
|
const [fuseModule, allFundList] = await Promise.all([
|
||||||
|
import('fuse.js'),
|
||||||
|
cachedRequest(
|
||||||
|
() =>
|
||||||
|
new Promise((resolve, reject) => {
|
||||||
|
if (typeof window === 'undefined' || typeof document === 'undefined' || !document.body) {
|
||||||
|
reject(new Error('NO_BROWSER_ENV'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const prevR = window.r;
|
||||||
|
const script = document.createElement('script');
|
||||||
|
script.src = `${FUND_CODE_SEARCH_URL}?_=${Date.now()}`;
|
||||||
|
script.async = true;
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
if (document.body.contains(script)) {
|
||||||
|
document.body.removeChild(script);
|
||||||
|
}
|
||||||
|
if (prevR === undefined) {
|
||||||
|
try {
|
||||||
|
delete window.r;
|
||||||
|
} catch (e) {
|
||||||
|
window.r = undefined;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
window.r = prevR;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
script.onload = () => {
|
||||||
|
const snapshot = Array.isArray(window.r) ? JSON.parse(JSON.stringify(window.r)) : [];
|
||||||
|
cleanup();
|
||||||
|
const parsed = formatEastMoneyFundList(snapshot);
|
||||||
|
if (!parsed.length) {
|
||||||
|
reject(new Error('PARSE_ALL_FUND_FAILED'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve(parsed);
|
||||||
|
};
|
||||||
|
|
||||||
|
script.onerror = () => {
|
||||||
|
cleanup();
|
||||||
|
reject(new Error('LOAD_ALL_FUND_FAILED'));
|
||||||
|
};
|
||||||
|
|
||||||
|
document.body.appendChild(script);
|
||||||
|
}),
|
||||||
|
FUND_LIST_CACHE_KEY,
|
||||||
|
{ cacheTime: FUND_LIST_CACHE_TIME }
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
const Fuse = fuseModule.default;
|
||||||
|
const fuse = new Fuse(Array.isArray(allFundList) ? allFundList : [], {
|
||||||
|
keys: ['name', 'code'],
|
||||||
|
includeScore: true,
|
||||||
|
threshold: 0.5,
|
||||||
|
ignoreLocation: true,
|
||||||
|
minMatchCharLength: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
allFundFuseRef.current = fuse;
|
||||||
|
return fuse;
|
||||||
|
})();
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await allFundLoadPromiseRef.current;
|
||||||
|
} catch (e) {
|
||||||
|
allFundLoadPromiseRef.current = null;
|
||||||
|
clearCachedRequest(FUND_LIST_CACHE_KEY);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const normalizeFundText = useCallback((value) => {
|
||||||
|
if (typeof value !== 'string') return '';
|
||||||
|
return value
|
||||||
|
.toUpperCase()
|
||||||
|
.replace(/[((]/g, '(')
|
||||||
|
.replace(/[))]/g, ')')
|
||||||
|
.replace(/[·•]/g, '')
|
||||||
|
.replace(/\s+/g, '')
|
||||||
|
.replace(/[^\u4e00-\u9fa5A-Z0-9()]/g, '');
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const parseFundQuerySignals = useCallback((rawName) => {
|
||||||
|
const normalized = normalizeFundText(rawName);
|
||||||
|
const hasETF = normalized.includes('ETF');
|
||||||
|
const hasLOF = normalized.includes('LOF');
|
||||||
|
const hasLink = normalized.includes('联接');
|
||||||
|
const shareMatch = normalized.match(/([A-Z])(?:类)?$/i);
|
||||||
|
const shareClass = shareMatch ? shareMatch[1].toUpperCase() : null;
|
||||||
|
|
||||||
|
const core = normalized
|
||||||
|
.replace(/基金/g, '')
|
||||||
|
.replace(/ETF联接/g, '')
|
||||||
|
.replace(/联接[A-Z]?/g, '')
|
||||||
|
.replace(/ETF/g, '')
|
||||||
|
.replace(/LOF/g, '')
|
||||||
|
.replace(/[A-Z](?:类)?$/g, '');
|
||||||
|
|
||||||
|
return {
|
||||||
|
normalized,
|
||||||
|
core,
|
||||||
|
hasETF,
|
||||||
|
hasLOF,
|
||||||
|
hasLink,
|
||||||
|
shareClass,
|
||||||
|
};
|
||||||
|
}, [normalizeFundText]);
|
||||||
|
|
||||||
|
const resolveFundCodeByFuzzy = useCallback(async (name) => {
|
||||||
|
const querySignals = parseFundQuerySignals(name);
|
||||||
|
if (!querySignals.normalized) return null;
|
||||||
|
|
||||||
|
const len = querySignals.normalized.length;
|
||||||
|
const strictThreshold = len <= 4 ? 0.16 : len <= 8 ? 0.22 : 0.28;
|
||||||
|
const relaxedThreshold = Math.min(0.45, strictThreshold + 0.16);
|
||||||
|
const scoreGapThreshold = len <= 5 ? 0.08 : 0.06;
|
||||||
|
|
||||||
|
const fuse = await getAllFundFuse();
|
||||||
|
const recalled = fuse.search(name, { limit: 50 });
|
||||||
|
if (!recalled.length) return null;
|
||||||
|
|
||||||
|
const stage1 = recalled.filter((item) => (item.score ?? 1) <= relaxedThreshold);
|
||||||
|
if (!stage1.length) return null;
|
||||||
|
|
||||||
|
const ranked = stage1
|
||||||
|
.map((item) => {
|
||||||
|
const candidateSignals = parseFundQuerySignals(item?.item?.name || '');
|
||||||
|
let finalScore = item.score ?? 1;
|
||||||
|
|
||||||
|
if (querySignals.hasETF) {
|
||||||
|
finalScore += candidateSignals.hasETF ? -0.04 : 0.2;
|
||||||
|
}
|
||||||
|
if (querySignals.hasLOF) {
|
||||||
|
finalScore += candidateSignals.hasLOF ? -0.04 : 0.2;
|
||||||
|
}
|
||||||
|
if (querySignals.hasLink) {
|
||||||
|
finalScore += candidateSignals.hasLink ? -0.03 : 0.18;
|
||||||
|
}
|
||||||
|
if (querySignals.shareClass) {
|
||||||
|
finalScore += candidateSignals.shareClass === querySignals.shareClass ? -0.03 : 0.18;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (querySignals.core && candidateSignals.core) {
|
||||||
|
if (candidateSignals.core.includes(querySignals.core)) {
|
||||||
|
finalScore -= 0.06;
|
||||||
|
} else if (!querySignals.core.includes(candidateSignals.core)) {
|
||||||
|
finalScore += 0.06;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ...item, finalScore };
|
||||||
|
})
|
||||||
|
.sort((a, b) => a.finalScore - b.finalScore);
|
||||||
|
|
||||||
|
const top1 = ranked[0];
|
||||||
|
if (!top1 || top1.finalScore > strictThreshold) return null;
|
||||||
|
|
||||||
|
const top2 = ranked[1];
|
||||||
|
if (top2 && (top2.finalScore - top1.finalScore) < scoreGapThreshold) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return top1?.item?.code || null;
|
||||||
|
}, [getAllFundFuse, parseFundQuerySignals]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
resolveFundCodeByFuzzy,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export default useFundFuzzyMatcher;
|
||||||
11
app/page.jsx
11
app/page.jsx
@@ -49,6 +49,7 @@ import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFu
|
|||||||
import packageJson from '../package.json';
|
import packageJson from '../package.json';
|
||||||
import PcFundTable from './components/PcFundTable';
|
import PcFundTable from './components/PcFundTable';
|
||||||
import MobileFundTable from './components/MobileFundTable';
|
import MobileFundTable from './components/MobileFundTable';
|
||||||
|
import { useFundFuzzyMatcher } from './hooks/useFundFuzzyMatcher';
|
||||||
|
|
||||||
dayjs.extend(utc);
|
dayjs.extend(utc);
|
||||||
dayjs.extend(timezone);
|
dayjs.extend(timezone);
|
||||||
@@ -1173,6 +1174,7 @@ export default function HomePage() {
|
|||||||
const abortScanRef = useRef(false); // 终止扫描标记
|
const abortScanRef = useRef(false); // 终止扫描标记
|
||||||
const fileInputRef = useRef(null);
|
const fileInputRef = useRef(null);
|
||||||
const ocrWorkerRef = useRef(null);
|
const ocrWorkerRef = useRef(null);
|
||||||
|
const { resolveFundCodeByFuzzy } = useFundFuzzyMatcher();
|
||||||
|
|
||||||
const handleScanClick = () => {
|
const handleScanClick = () => {
|
||||||
setScanModalOpen(true);
|
setScanModalOpen(true);
|
||||||
@@ -1327,6 +1329,15 @@ export default function HomePage() {
|
|||||||
if (found && found.CODE) {
|
if (found && found.CODE) {
|
||||||
allCodes.add(found.CODE);
|
allCodes.add(found.CODE);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// 使用 fuse.js 读取 Public 中的 allFunds 数据进行模糊匹配,补充搜索接口的不足
|
||||||
|
try {
|
||||||
|
const fuzzyCode = await resolveFundCodeByFuzzy(name);
|
||||||
|
if (fuzzyCode) {
|
||||||
|
allCodes.add(fuzzyCode);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
}
|
}
|
||||||
|
|||||||
10
package-lock.json
generated
10
package-lock.json
generated
@@ -19,6 +19,7 @@
|
|||||||
"chart.js": "^4.5.1",
|
"chart.js": "^4.5.1",
|
||||||
"dayjs": "^1.11.19",
|
"dayjs": "^1.11.19",
|
||||||
"framer-motion": "^12.29.2",
|
"framer-motion": "^12.29.2",
|
||||||
|
"fuse.js": "^7.1.0",
|
||||||
"lodash": "^4.17.23",
|
"lodash": "^4.17.23",
|
||||||
"next": "^16.1.5",
|
"next": "^16.1.5",
|
||||||
"react": "18.3.1",
|
"react": "18.3.1",
|
||||||
@@ -4149,6 +4150,15 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/fuse.js": {
|
||||||
|
"version": "7.1.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/fuse.js/-/fuse.js-7.1.0.tgz",
|
||||||
|
"integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/generator-function": {
|
"node_modules/generator-function": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/generator-function/-/generator-function-2.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/generator-function/-/generator-function-2.0.1.tgz",
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
"chart.js": "^4.5.1",
|
"chart.js": "^4.5.1",
|
||||||
"dayjs": "^1.11.19",
|
"dayjs": "^1.11.19",
|
||||||
"framer-motion": "^12.29.2",
|
"framer-motion": "^12.29.2",
|
||||||
|
"fuse.js": "^7.1.0",
|
||||||
"lodash": "^4.17.23",
|
"lodash": "^4.17.23",
|
||||||
"next": "^16.1.5",
|
"next": "^16.1.5",
|
||||||
"react": "18.3.1",
|
"react": "18.3.1",
|
||||||
|
|||||||
104742
public/allFund.json
Normal file
104742
public/allFund.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user