feat: OCR识别库cdn加速地址调整

This commit is contained in:
hzm
2026-02-18 23:14:32 +08:00
parent 67c36f65df
commit 6c2eef1533

View File

@@ -2,6 +2,7 @@
import { useEffect, useRef, useState, useMemo, useLayoutEffect, useCallback } from 'react'; import { useEffect, useRef, useState, useMemo, useLayoutEffect, useCallback } from 'react';
import { motion, AnimatePresence, Reorder } from 'framer-motion'; import { motion, AnimatePresence, Reorder } from 'framer-motion';
import { createWorker } from 'tesseract.js';
import { createAvatar } from '@dicebear/core'; import { createAvatar } from '@dicebear/core';
import { glass } from '@dicebear/collection'; import { glass } from '@dicebear/collection';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
@@ -2478,12 +2479,7 @@ export default function HomePage() {
const [scanProgress, setScanProgress] = useState({ stage: 'ocr', current: 0, total: 0 }); // stage: ocr | verify const [scanProgress, setScanProgress] = useState({ stage: 'ocr', current: 0, total: 0 }); // stage: ocr | verify
const abortScanRef = useRef(false); // 终止扫描标记 const abortScanRef = useRef(false); // 终止扫描标记
const fileInputRef = useRef(null); const fileInputRef = useRef(null);
const ocrWorkerRef = useRef(null);
// 引入 Tesseract
const [Tesseract, setTesseract] = useState(null);
useEffect(() => {
import('tesseract.js').then(mod => setTesseract(mod.default));
}, []);
const handleScanClick = () => { const handleScanClick = () => {
setScanModalOpen(true); setScanModalOpen(true);
@@ -2499,16 +2495,18 @@ export default function HomePage() {
abortScanRef.current = true; abortScanRef.current = true;
setIsScanning(false); setIsScanning(false);
setScanProgress({ stage: 'ocr', current: 0, total: 0 }); setScanProgress({ stage: 'ocr', current: 0, total: 0 });
if (ocrWorkerRef.current) {
try {
ocrWorkerRef.current.terminate();
} catch (e) {}
ocrWorkerRef.current = null;
}
if (fileInputRef.current) fileInputRef.current.value = ''; if (fileInputRef.current) fileInputRef.current.value = '';
}; };
const handleFilesUpload = async (event) => { const handleFilesUpload = async (event) => {
const files = Array.from(event.target.files || []); const files = Array.from(event.target.files || []);
if (!files.length) return; if (!files.length) return;
if (!Tesseract) {
alert('OCR 组件加载中,请稍后重试');
return;
}
setIsScanning(true); setIsScanning(true);
setScanModalOpen(false); // 关闭选择弹窗 setScanModalOpen(false); // 关闭选择弹窗
@@ -2516,6 +2514,55 @@ export default function HomePage() {
setScanProgress({ stage: 'ocr', current: 0, total: files.length }); setScanProgress({ stage: 'ocr', current: 0, total: files.length });
try { try {
let worker = ocrWorkerRef.current;
if (!worker) {
const cdnBases = [
'https://cdn.jsdmirror.com/npm',
'https://cdn.jsdelivr.net/npm'
];
let lastErr = null;
for (const base of cdnBases) {
try {
worker = await createWorker('eng', 1, {
workerPath: `${base}/tesseract.js@v7.0.0/dist/worker.min.js`,
corePath: `${base}/tesseract.js-core@v7.0.0/tesseract-core-relaxedsimd-lstm.wasm.js`
});
lastErr = null;
break;
} catch (e) {
lastErr = e;
}
}
if (lastErr) throw lastErr;
ocrWorkerRef.current = worker;
}
const recognizeWithTimeout = async (file, ms) => {
let timer = null;
const timeout = new Promise((_, reject) => {
timer = setTimeout(() => reject(new Error('OCR_TIMEOUT')), ms);
});
try {
return await Promise.race([worker.recognize(file), timeout]);
} finally {
if (timer) clearTimeout(timer);
}
};
const searchFundsWithTimeout = async (val, ms) => {
let timer = null;
const timeout = new Promise((resolve) => {
timer = setTimeout(() => resolve([]), ms);
});
try {
return await Promise.race([searchFunds(val), timeout]);
} catch (e) {
return [];
} finally {
if (timer) clearTimeout(timer);
}
};
const allCodes = new Set(); const allCodes = new Set();
for (let i = 0; i < files.length; i++) { for (let i = 0; i < files.length; i++) {
if (abortScanRef.current) break; if (abortScanRef.current) break;
@@ -2524,7 +2571,22 @@ export default function HomePage() {
// 更新进度:正在处理第 i+1 张 // 更新进度:正在处理第 i+1 张
setScanProgress(prev => ({ ...prev, current: i + 1 })); setScanProgress(prev => ({ ...prev, current: i + 1 }));
const { data: { text } } = await Tesseract.recognize(f, 'eng'); // 这里使用英文解析能提升速度 let text = '';
try {
const res = await recognizeWithTimeout(f, 30000);
text = res?.data?.text || '';
} catch (e) {
if (String(e?.message || '').includes('OCR_TIMEOUT')) {
if (worker) {
try {
await worker.terminate();
} catch (err) {}
ocrWorkerRef.current = null;
}
throw e;
}
text = '';
}
const matches = text.match(/\b\d{6}\b/g) || []; const matches = text.match(/\b\d{6}\b/g) || [];
matches.forEach(c => allCodes.add(c)); matches.forEach(c => allCodes.add(c));
} }
@@ -2546,7 +2608,7 @@ export default function HomePage() {
let found = null; let found = null;
try { try {
const list = await searchFunds(code); const list = await searchFundsWithTimeout(code, 8000);
found = Array.isArray(list) ? list.find(d => d.CODE === code) : null; found = Array.isArray(list) ? list.find(d => d.CODE === code) : null;
} catch (e) { } catch (e) {
found = null; found = null;