xiongzhu 2 سال پیش
والد
کامیت
45c0923305
1فایلهای تغییر یافته به همراه60 افزوده شده و 60 حذف شده
  1. 60 60
      src/knowledge-base/knowledge-base.service.ts

+ 60 - 60
src/knowledge-base/knowledge-base.service.ts

@@ -246,72 +246,72 @@ export class KnowledgeBaseService implements OnModuleInit {
                 this.processExcelKnowledgeFile(knowledgeFile, buffer)
                 break
             case 'application/pdf':
-                // this.processPdfKnowledgeFile(knowledgeFile, buffer)
-                this.processFile(knowledgeFile, buffer)
+                this.processPdfKnowledgeFile(knowledgeFile, buffer)
+                // this.processFile(knowledgeFile, buffer)
                 break
         }
         return knowledgeFile
     }
 
-    // public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
-    //     knowledgeFile.status = FileStatus.PROCESSING
-    //     try {
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //         const pdf = await PdfParse(buffer)
-    //         const contents = []
-    //         let paragraph = ''
-    //         pdf.text
-    //             .trim()
-    //             .split('\n')
-    //             .forEach((line) => {
-    //                 line = line.trim()
-    //                 paragraph += line
-    //                 if (this.isFullSentence(line)) {
-    //                     contents.push(paragraph)
-    //                     paragraph = ''
-    //                 }
-    //             })
-    //         if (paragraph) {
-    //             contents.push(paragraph)
-    //         }
+    public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
+        knowledgeFile.status = FileStatus.PROCESSING
+        try {
+            await this.knowledgeFileRepository.save(knowledgeFile)
+            const pdf = await PdfParse(buffer)
+            const contents = []
+            let paragraph = ''
+            pdf.text
+                .trim()
+                .split('\n')
+                .forEach((line) => {
+                    line = line.trim()
+                    paragraph += line
+                    if (this.isFullSentence(line)) {
+                        contents.push(paragraph)
+                        paragraph = ''
+                    }
+                })
+            if (paragraph) {
+                contents.push(paragraph)
+            }
 
-    //         const embeddings = await this.createEmbeddings(
-    //             contents.map((i) => {
-    //                 return { text: i }
-    //             })
-    //         )
-    //         Logger.log(
-    //             `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
-    //         )
-    //         await KnowledgeEmbedding.destroy({
-    //             where: {
-    //                 fileHash: knowledgeFile.fileHash
-    //             }
-    //         })
-    //         let i = 0
-    //         for (const item of embeddings) {
-    //             try {
-    //                 await KnowledgeEmbedding.create({
-    //                     orgId: knowledgeFile.orgId,
-    //                     knowledgeId: knowledgeFile.knowledgeId,
-    //                     fileId: knowledgeFile.id,
-    //                     fileHash: knowledgeFile.fileHash,
-    //                     text: item.text,
-    //                     embedding: formatEmbedding(item.embedding),
-    //                     index: i++
-    //                 })
-    //             } catch (error) {
-    //                 Logger.error(error.message)
-    //             }
-    //         }
-    //         knowledgeFile.status = FileStatus.DONE
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //     } catch (e) {
-    //         knowledgeFile.status = FileStatus.FAILED
-    //         knowledgeFile.error = e.message
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //     }
-    // }
+            const embeddings = await this.createEmbeddings(
+                contents.map((i) => {
+                    return { text: i }
+                })
+            )
+            Logger.log(
+                `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
+            )
+            await KnowledgeEmbedding.destroy({
+                where: {
+                    fileHash: knowledgeFile.fileHash
+                }
+            })
+            let i = 0
+            for (const item of embeddings) {
+                try {
+                    await KnowledgeEmbedding.create({
+                        orgId: knowledgeFile.orgId,
+                        knowledgeId: knowledgeFile.knowledgeId,
+                        fileId: knowledgeFile.id,
+                        fileHash: knowledgeFile.fileHash,
+                        text: item.text,
+                        embedding: formatEmbedding(item.embedding),
+                        index: i++
+                    })
+                } catch (error) {
+                    Logger.error(error.message)
+                }
+            }
+            knowledgeFile.status = FileStatus.DONE
+            await this.knowledgeFileRepository.save(knowledgeFile)
+        } catch (e) {
+            knowledgeFile.status = FileStatus.FAILED
+            knowledgeFile.error = e.message
+            await this.knowledgeFileRepository.save(knowledgeFile)
+        }
+    }
 
     public async processFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
         knowledgeFile.status = FileStatus.PROCESSING