Răsfoiți Sursa

Merge branch 'master' of http://git.izouma.com/xiongzhu/chat-api

panhui 2 ani în urmă
părinte
comite
88faad58a3
2 a modificat fișierele cu 62 adăugiri și 61 ștergeri
  1. 2 1
      .env.production
  2. 60 60
      src/knowledge-base/knowledge-base.service.ts

+ 2 - 1
.env.production

@@ -49,6 +49,7 @@ ALIYUN_SMS_SIGN=走马信息
 ALIYUN_SMS_TEMPLATE_CODE=SMS_175485688
 
 AZURE_OPENAI_KEY=62dd8a1466524c64967810c692f0197e
+AZURE_OPENAI_INSTANCE=zouma1
 AZURE_OPENAI_ENDPOINT=https://zouma1.openai.azure.com
 AZURE_OPENAI_DEPLOYMENT=gpt-35-turbo
 AZURE_OPENAI_VERSION=2023-03-15-preview
@@ -56,7 +57,7 @@ AZURE_OPENAI_VERSION=2023-03-15-preview
 AZURE_EMBEDDING_KEY=beb32e4625a94b65ba8bc0ba1688c4d2
 AZURE_EMBEDDING_INSTANCE=zouma
 AZURE_EMBEDDING_DEPLOYMENT=embedding
-AZURE_OPENAI_VERSION=2023-03-15-preview
+AZURE_EMBEDDING_VERSION=2023-03-15-preview
 
 OPENAI_API_KEY=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
 

+ 60 - 60
src/knowledge-base/knowledge-base.service.ts

@@ -246,72 +246,72 @@ export class KnowledgeBaseService implements OnModuleInit {
                 this.processExcelKnowledgeFile(knowledgeFile, buffer)
                 break
             case 'application/pdf':
-                // this.processPdfKnowledgeFile(knowledgeFile, buffer)
-                this.processFile(knowledgeFile, buffer)
+                this.processPdfKnowledgeFile(knowledgeFile, buffer)
+                // this.processFile(knowledgeFile, buffer)
                 break
         }
         return knowledgeFile
     }
 
-    // public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
-    //     knowledgeFile.status = FileStatus.PROCESSING
-    //     try {
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //         const pdf = await PdfParse(buffer)
-    //         const contents = []
-    //         let paragraph = ''
-    //         pdf.text
-    //             .trim()
-    //             .split('\n')
-    //             .forEach((line) => {
-    //                 line = line.trim()
-    //                 paragraph += line
-    //                 if (this.isFullSentence(line)) {
-    //                     contents.push(paragraph)
-    //                     paragraph = ''
-    //                 }
-    //             })
-    //         if (paragraph) {
-    //             contents.push(paragraph)
-    //         }
+    public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
+        knowledgeFile.status = FileStatus.PROCESSING
+        try {
+            await this.knowledgeFileRepository.save(knowledgeFile)
+            const pdf = await PdfParse(buffer)
+            const contents = []
+            let paragraph = ''
+            pdf.text
+                .trim()
+                .split('\n')
+                .forEach((line) => {
+                    line = line.trim()
+                    paragraph += line
+                    if (this.isFullSentence(line)) {
+                        contents.push(paragraph)
+                        paragraph = ''
+                    }
+                })
+            if (paragraph) {
+                contents.push(paragraph)
+            }
 
-    //         const embeddings = await this.createEmbeddings(
-    //             contents.map((i) => {
-    //                 return { text: i }
-    //             })
-    //         )
-    //         Logger.log(
-    //             `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
-    //         )
-    //         await KnowledgeEmbedding.destroy({
-    //             where: {
-    //                 fileHash: knowledgeFile.fileHash
-    //             }
-    //         })
-    //         let i = 0
-    //         for (const item of embeddings) {
-    //             try {
-    //                 await KnowledgeEmbedding.create({
-    //                     orgId: knowledgeFile.orgId,
-    //                     knowledgeId: knowledgeFile.knowledgeId,
-    //                     fileId: knowledgeFile.id,
-    //                     fileHash: knowledgeFile.fileHash,
-    //                     text: item.text,
-    //                     embedding: formatEmbedding(item.embedding),
-    //                     index: i++
-    //                 })
-    //             } catch (error) {
-    //                 Logger.error(error.message)
-    //             }
-    //         }
-    //         knowledgeFile.status = FileStatus.DONE
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //     } catch (e) {
-    //         knowledgeFile.status = FileStatus.FAILED
-    //         knowledgeFile.error = e.message
-    //         await this.knowledgeFileRepository.save(knowledgeFile)
-    //     }
-    // }
+            const embeddings = await this.createEmbeddings(
+                contents.map((i) => {
+                    return { text: i }
+                })
+            )
+            Logger.log(
+                `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
+            )
+            await KnowledgeEmbedding.destroy({
+                where: {
+                    fileHash: knowledgeFile.fileHash
+                }
+            })
+            let i = 0
+            for (const item of embeddings) {
+                try {
+                    await KnowledgeEmbedding.create({
+                        orgId: knowledgeFile.orgId,
+                        knowledgeId: knowledgeFile.knowledgeId,
+                        fileId: knowledgeFile.id,
+                        fileHash: knowledgeFile.fileHash,
+                        text: item.text,
+                        embedding: formatEmbedding(item.embedding),
+                        index: i++
+                    })
+                } catch (error) {
+                    Logger.error(error.message)
+                }
+            }
+            knowledgeFile.status = FileStatus.DONE
+            await this.knowledgeFileRepository.save(knowledgeFile)
+        } catch (e) {
+            knowledgeFile.status = FileStatus.FAILED
+            knowledgeFile.error = e.message
+            await this.knowledgeFileRepository.save(knowledgeFile)
+        }
+    }
 
     public async processFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
         knowledgeFile.status = FileStatus.PROCESSING