feat: OCR 支持搜索基金名称

2026-02-24 20:26:32 +08:00
parent b9ee4546b7
commit 17edeccecc
2 changed files with 107 additions and 3 deletions
--- a/app/api/fund.js
+++ b/app/api/fund.js
@@ -449,6 +449,72 @@ export const submitFeedback = async (formData) => {
  return response.json();
 };

+// 使用智谱 GLM 从 OCR 文本中抽取基金名称
+export const extractFundNamesWithLLM = async (ocrText) => {
+  const apiKey = '8df8ccf74a174722847c83b7e222f2af.4A39rJvUeBVDmef1';
+  if (!apiKey || !ocrText) return [];
+
+  try {
+    const models = ['glm-4.5-flash', 'glm-4.7-flash'];
+    const model = models[Math.floor(Math.random() * models.length)];
+
+    const resp = await fetch('https://open.bigmodel.cn/api/paas/v4/chat/completions', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: `Bearer ${apiKey}`,
+      },
+      body: JSON.stringify({
+        model,
+        messages: [
+          {
+            role: 'user',
+            content:
+              '你是一个基金 OCR 文本解析助手。' +
+              '从下面的 OCR 文本中抽取其中出现的「基金名称列表」。' +
+              '要求：1）基金名称一般为中文，中间不能有空字符串,可包含部分英文或括号' +
+              '2）名称后面通常会跟着金额或持有金额（数字，可能带千分位逗号和小数）；' +
+              '3）忽略无关信息，只返回你判断为基金名称的字符串；' +
+              '4）去重后输出。输出格式：严格返回 JSON，如 {"fund_names": ["基金名称1","基金名称2"]}，不要输出任何多余说明',
+          },
+          {
+            role: 'user',
+            content: String(ocrText),
+          },
+        ],
+        temperature: 0.2,
+        max_tokens: 1024,
+        thinking: {
+          type: 'disabled',
+        },
+      }),
+    });
+
+    if (!resp.ok) {
+      return [];
+    }
+
+    const data = await resp.json();
+    let content = data?.choices?.[0]?.message?.content?.match(/\{[\s\S]*?\}/)?.[0];
+    if (!content || typeof content !== 'string') return [];
+
+    let parsed;
+    try {
+      parsed = JSON.parse(content);
+    } catch {
+      return [];
+    }
+
+    const names = parsed?.fund_names;
+    if (!Array.isArray(names)) return [];
+    return names
+      .map((n) => (typeof n === 'string' ? n.trim().replaceAll(' ','') : ''))
+      .filter(Boolean);
+  } catch (e) {
+    return [];
+  }
+};
+
 let historyQueue = Promise.resolve();

 export const fetchFundHistory = async (code, range = '1m') => {
--- a/app/page.jsx
+++ b/app/page.jsx
@@ -39,7 +39,7 @@ import WeChatModal from "./components/WeChatModal";
 import githubImg from "./assets/github.svg";
 import { supabase, isSupabaseConfigured } from './lib/supabase';
 import { recordValuation, getAllValuationSeries, clearFund } from './lib/valuationTimeseries';
-import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds } from './api/fund';
+import { fetchFundData, fetchLatestRelease, fetchShanghaiIndexDate, fetchSmartFundNetValue, searchFunds, extractFundNamesWithLLM } from './api/fund';
 import packageJson from '../package.json';

 dayjs.extend(utc);
@@ -1043,7 +1043,7 @@ export default function HomePage() {
        for (const base of cdnBases) {
          for (const coreFile of coreCandidates) {
            try {
-              worker = await createWorker('eng', 1, {
+              worker = await createWorker('chi_sim+eng', 1, {
                workerPath: `${base}/tesseract.js@v5.1.1/dist/worker.min.js`,
                corePath: `${base}/tesseract.js-core@v5.1.1/${coreFile}`,
              });
@@ -1086,6 +1086,7 @@ export default function HomePage() {
      };

      const allCodes = new Set();
+      const allNames = new Set();
      for (let i = 0; i < files.length; i++) {
        if (abortScanRef.current) break;

@@ -1111,6 +1112,21 @@ export default function HomePage() {
        }
        const matches = text.match(/\b\d{6}\b/g) || [];
        matches.forEach(c => allCodes.add(c));
+
+        // 如果当前图片中没有识别出基金编码，尝试从文本中提取可能的中文基金名称（调用 GLM 接口）
+        if (!matches.length && text) {
+          let parsedNames = [];
+          try {
+            parsedNames = await extractFundNamesWithLLM(text);
+          } catch (e) {
+            parsedNames = [];
+          }
+          parsedNames.forEach((name) => {
+            if (name && typeof name === 'string') {
+              allNames.add(name.trim());
+            }
+          });
+        }
      }

      if (abortScanRef.current) {
@@ -1118,6 +1134,28 @@ export default function HomePage() {
        return;
      }

+      // 如果所有截图中都没有识别出基金编码，尝试使用识别到的中文名称去搜索基金
+      if (allCodes.size === 0 && allNames.size > 0) {
+        const names = Array.from(allNames);
+        setScanProgress({ stage: 'verify', current: 0, total: names.length });
+        for (let i = 0; i < names.length; i++) {
+          if (abortScanRef.current) break;
+          const name = names[i];
+          setScanProgress(prev => ({ ...prev, current: i + 1 }));
+          try {
+            const list = await searchFundsWithTimeout(name, 8000);
+            // 只有当搜索结果「有且仅有一条」时，才认为名称匹配是唯一且有效的
+            if (Array.isArray(list) && list.length === 1) {
+              const found = list[0];
+              if (found && found.CODE) {
+                allCodes.add(found.CODE);
+              }
+            }
+          } catch (e) {
+          }
+        }
+      }
+
      const codes = Array.from(allCodes).sort();
      setScanProgress({ stage: 'verify', current: 0, total: codes.length });