xiongzhu преди 2 години
родител
ревизия
493e447717
променени са 7 файла, в които са добавени 132 реда и са изтрити 14 реда
  1. 2 0
      package.json
  2. 2 1
      src/auth/auth.service.ts
  3. 77 11
      src/knowledge-base/knowledge-base.service.ts
  4. 9 0
      src/org/entities/org.entity.ts
  5. 7 1
      src/org/org.controller.ts
  6. 19 1
      src/org/org.service.ts
  7. 16 0
      yarn.lock

+ 2 - 0
package.json

@@ -63,10 +63,12 @@
     "ioredis": "^5.3.2",
     "isomorphic-fetch": "^3.0.0",
     "keyv": "^4.5.2",
+    "mime": "^3.0.0",
     "mongodb": "^5.2.0",
     "mongoose": "^7.0.4",
     "mysql2": "^3.1.2",
     "nestjs-typeorm-paginate": "^4.0.3",
+    "node-xlsx": "^0.23.0",
     "nodemailer": "^6.9.1",
     "p-timeout": "^6.1.1",
     "passport": "^0.6.0",

+ 2 - 1
src/auth/auth.service.ts

@@ -16,7 +16,8 @@ export class AuthService {
         }
         this.usersService.updateIat(user)
         return {
-            access_token: this.jwtService.sign(payload)
+            access_token: this.jwtService.sign(payload),
+            user
         }
     }
 

+ 77 - 11
src/knowledge-base/knowledge-base.service.ts

@@ -23,6 +23,8 @@ import { Pagination, paginate } from 'nestjs-typeorm-paginate'
 import { KnowledgeFile } from './entities/knowledge-file.entity'
 import { FileService } from 'src/file/file.service'
 import { FileStatus } from './enums/file-status.enum'
+import xlsx from 'node-xlsx'
+import * as mime from 'mime'
 
 function formatEmbedding(embedding: number[]) {
     return `[${embedding.join(', ')}]`
@@ -70,7 +72,7 @@ export class KnowledgeBaseService {
                     type: DataTypes.INTEGER
                 },
                 orgId: {
-                    type: DataTypes.INTEGER 
+                    type: DataTypes.INTEGER
                 },
                 knowledgeId: {
                     type: DataTypes.INTEGER
@@ -155,7 +157,7 @@ export class KnowledgeBaseService {
         const { url: fileUrl } = await this.fileService.uploadBuffer(
             buffer,
             mimetype.split('/')[1],
-            originalname.split('.').slice(-1)
+            mime.getExtension(mimetype)
         )
         knowledgeFile = new KnowledgeFile()
         knowledgeFile.orgId = knowledgeBase.orgId
@@ -165,12 +167,18 @@ export class KnowledgeBaseService {
         knowledgeFile.fileName = fileName
         knowledgeFile.size = size
         knowledgeFile.fileUrl = fileUrl
-        await this.knowledgeFileRepository.save(knowledgeFile)
-        this.processKnowledgeFile(knowledgeFile, buffer)
+        switch (mimetype) {
+            case 'application/pdf':
+                await this.processPdfKnowledgeFile(knowledgeFile, buffer)
+                break
+            case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
+                await this.processExcelKnowledgeFile(knowledgeFile, buffer)
+                break
+        }
         return knowledgeFile
     }
 
-    public async processKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
+    public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
         knowledgeFile.status = FileStatus.PROCESSING
         try {
             await this.knowledgeFileRepository.save(knowledgeFile)
@@ -192,7 +200,11 @@ export class KnowledgeBaseService {
                 contents.push(paragraph)
             }
 
-            const embeddings = await this.createEmbeddings(contents)
+            const embeddings = await this.createEmbeddings(
+                contents.map((i) => {
+                    return { text: i }
+                })
+            )
             Logger.log(
                 `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
             )
@@ -226,6 +238,57 @@ export class KnowledgeBaseService {
         }
     }
 
+    public async processExcelKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
+        knowledgeFile.status = FileStatus.PROCESSING
+        try {
+            await this.knowledgeFileRepository.save(knowledgeFile)
+            const sheets = xlsx.parse(buffer)
+            for (let sheet of sheets) {
+                const data = sheet.data.filter((i) => i[0].length && i[1].length)
+                const contents = data.map((i) => {
+                    return {
+                        text: i[0],
+                        detail: i[1]
+                    }
+                })
+                const embeddings = await this.createEmbeddings(contents)
+                Logger.log(
+                    `create embeddings finished, total token usage: ${embeddings.reduce(
+                        (acc, cur) => acc + cur.token,
+                        0
+                    )}`
+                )
+                await KnowledgeEmbedding.destroy({
+                    where: {
+                        fileHash: knowledgeFile.fileHash
+                    }
+                })
+                let i = 0
+                for (const item of embeddings) {
+                    try {
+                        await KnowledgeEmbedding.create({
+                            orgId: knowledgeFile.orgId,
+                            knowledgeId: knowledgeFile.knowledgeId,
+                            fileId: knowledgeFile.id,
+                            fileHash: knowledgeFile.fileHash,
+                            text: item.text + '\n' + item.detail,
+                            embedding: formatEmbedding(item.embedding),
+                            index: i++
+                        })
+                    } catch (error) {
+                        Logger.error(error.message)
+                    }
+                }
+                knowledgeFile.status = FileStatus.DONE
+                await this.knowledgeFileRepository.save(knowledgeFile)
+            }
+        } catch (e) {
+            knowledgeFile.status = FileStatus.FAILED
+            knowledgeFile.error = e.message
+            await this.knowledgeFileRepository.save(knowledgeFile)
+        }
+    }
+
     isFullSentence(str) {
         return /[.!?。!?…;;::”’)】》」』〕〉》〗〞〟»"'\])}]+$/.test(str)
     }
@@ -236,22 +299,25 @@ export class KnowledgeBaseService {
         return hash.digest('hex')
     }
 
-    async createEmbeddings(content: string[]) {
+    async createEmbeddings(content: { text: string }[]) {
         const self = this
         const result = Array(content.length)
         async function worker(arg) {
-            result[arg.index] = await self.getEmbedding(arg.text)
+            result[arg.index] = {
+                ...arg,
+                ...(await self.getEmbedding(arg.text))
+            }
             Logger.log(`create embedding for ${arg.index + 1}/${content.length}`)
         }
         const q = queue.promise(worker, 32)
-        content.forEach((text, index) => {
+        content.forEach((item, index) => {
             q.push({
-                text,
+                ...item,
                 index
             })
         })
         await q.drained()
-        return result.filter((i) => i && i.text)
+        return result.filter((i) => i && i.embedding)
     }
 
     async getEmbedding(content: string, retry = 0) {

+ 9 - 0
src/org/entities/org.entity.ts

@@ -25,4 +25,13 @@ export class Org {
 
     @Column({ type: 'text' })
     contextTemplate: string
+
+    @Column({ unique: true, length: 120 })
+    subdomain: string
+    
+    @Column({ nullable: true })
+    customDomain: string
+
+    @Column({ nullable: true })
+    disclaimer: string
 }

+ 7 - 1
src/org/org.controller.ts

@@ -1,4 +1,4 @@
-import { Body, Controller, ForbiddenException, Get, Param, Post, Put, Req, Res, Sse } from '@nestjs/common'
+import { Body, Controller, ForbiddenException, Get, Param, Post, Put, Req, Res, Sse, Headers } from '@nestjs/common'
 import { Org } from './entities/org.entity'
 import { PageRequest } from '../common/dto/page-request'
 import { OrgService } from './org.service'
@@ -10,6 +10,12 @@ import { Public } from 'src/auth/public.decorator'
 export class OrgController {
     constructor(private readonly orgService: OrgService) {}
 
+    @Get()
+    @Public()
+    async getOrgFromUrl(@Headers('origin') origin) {
+        return await this.orgService.findByUrl(origin.replace(/^http(s?):\/\//, '').replace(/\/$/, ''))
+    }
+
     @Get('/my')
     async my(@Req() req) {
         if (!req.user.orgId) {

+ 19 - 1
src/org/org.service.ts

@@ -38,6 +38,24 @@ export class OrgService {
         )
     }
 
+    async findByUrl(url: string): Promise<Org> {
+        const match = /^(\w)\.org\.gpt\.izouma\.com$/.exec(url)
+        if (match) {
+            const subdomain = match[2]
+            return await this.orgRepository.findOneOrFail({
+                where: {
+                    subdomain
+                }
+            })
+        } else {
+            return await this.orgRepository.findOneOrFail({
+                where: {
+                    customDomain: url
+                }
+            })
+        }
+    }
+
     async findById(orgId: number): Promise<Org> {
         return await this.orgRepository.findOneOrFail({
             where: {
@@ -173,7 +191,7 @@ export class OrgService {
                     onMessage: (msg: string) => {
                         if (msg === '[DONE]') return
                         const response = JSON.parse(msg)
-                      
+
                         result.id = response.id
                         const delta = response.choices[0].delta
                         result.delta = delta.content

+ 16 - 0
yarn.lock

@@ -5125,6 +5125,11 @@ mime@2.6.0, mime@^2.4.5, mime@^2.5.2:
   resolved "https://registry.npmmirror.com/mime/-/mime-2.6.0.tgz#a2a682a95cd4d0cb1d6257e28f83da7e35800367"
   integrity sha512-USPkMeET31rOMiarsBNIHZKLGgvKc/LrjofAnBlOttf5ajRvqiRA8QsenbcooctK6d6Ts6aqZXBA+XbkKthiQg==
 
+mime@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.npmmirror.com/mime/-/mime-3.0.0.tgz#b374550dca3a0c18443b0c950a6a58f1931cf7a7"
+  integrity sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==
+
 mimic-fn@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz"
@@ -5437,6 +5442,13 @@ node-releases@^2.0.8:
   resolved "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz"
   integrity sha512-5GFldHPXVG/YZmFzJvKK2zDSzPKhEp0+ZR5SVaoSag9fsL5YgHbUHDfnG5494ISANDcK4KwPXAx2xqVEydmd7w==
 
+node-xlsx@^0.23.0:
+  version "0.23.0"
+  resolved "https://registry.npmmirror.com/node-xlsx/-/node-xlsx-0.23.0.tgz#0c4b642f9457712d68f30e1e30351d640cc37e90"
+  integrity sha512-r3KaSZSsSrK92rbPXnX/vDdxURmPPik0rjJ3A+Pybzpjyrk4G6WyGfj8JIz5dMMEpCmWVpmO4qoVPBxnpLv/8Q==
+  dependencies:
+    xlsx "https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz"
+
 nodemailer-express-handlebars@^6.0.0:
   version "6.0.0"
   resolved "https://registry.npmjs.org/nodemailer-express-handlebars/-/nodemailer-express-handlebars-6.0.0.tgz"
@@ -7491,6 +7503,10 @@ write-file-atomic@^4.0.2:
     imurmurhash "^0.1.4"
     signal-exit "^3.0.7"
 
+"xlsx@https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz":
+  version "0.19.3"
+  resolved "https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz#f804c1850e2da5260165db0a059dc2a6099d55f3"
+
 xml2js@^0.4.16, xml2js@^0.4.22, xml2js@^0.4.23:
   version "0.4.23"
   resolved "https://registry.npmmirror.com/xml2js/-/xml2js-0.4.23.tgz#a0c69516752421eb2ac758ee4d4ccf58843eac66"