|
|
@@ -1,4 +1,4 @@
|
|
|
-import { Injectable, InternalServerErrorException, Logger } from '@nestjs/common'
|
|
|
+import { BadRequestException, Injectable, InternalServerErrorException, Logger } from '@nestjs/common'
|
|
|
import * as PdfParse from '@cyber2024/pdf-parse-fixed'
|
|
|
import { createHash } from 'crypto'
|
|
|
import { Tiktoken, get_encoding } from '@dqbd/tiktoken'
|
|
|
@@ -11,6 +11,7 @@ import { ConfigService } from '@nestjs/config'
|
|
|
import { ChatEmbedding } from './entities/chat-embedding.entity'
|
|
|
import { VECTOR } from './pgvector'
|
|
|
import { SysConfigService } from '../sys-config/sys-config.service'
|
|
|
+import { ApiUserService } from '../api-users/api-user.service'
|
|
|
|
|
|
function formatEmbedding(embedding: number[]) {
|
|
|
return `[${embedding.join(', ')}]`
|
|
|
@@ -21,7 +22,11 @@ export class ChatPdfService {
|
|
|
private readonly tokenizer: Tiktoken
|
|
|
private readonly openai: OpenAIApi
|
|
|
private readonly sequelize: Sequelize
|
|
|
- constructor(private readonly sysConfigService: SysConfigService, private readonly configService: ConfigService) {
|
|
|
+ constructor(
|
|
|
+ private readonly sysConfigService: SysConfigService,
|
|
|
+ private readonly configService: ConfigService,
|
|
|
+ private readonly apiUserService: ApiUserService
|
|
|
+ ) {
|
|
|
this.tokenizer = get_encoding('cl100k_base')
|
|
|
this.openai = new OpenAIApi(
|
|
|
new Configuration({
|
|
|
@@ -252,6 +257,10 @@ export class ChatPdfService {
|
|
|
}
|
|
|
|
|
|
async customerAsk(q: string, name: string) {
|
|
|
+ let apiUser = this.apiUserService.findByCode(name)
|
|
|
+ if(!apiUser) {
|
|
|
+ throw new BadRequestException("not a enabled api user")
|
|
|
+ }
|
|
|
const defSysMsg = (await this.sysConfigService.findByName('customer_system_message'))?.value
|
|
|
const keywords = await this.getKeywords(q)
|
|
|
const { embedding: keywordEmbedding } = await this.getEmbedding(keywords)
|
|
|
@@ -289,4 +298,63 @@ export class ChatPdfService {
|
|
|
throw new InternalServerErrorException(error.message)
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ public async apiUpload(file: Express.Multer.File, userId: number) {
|
|
|
+ let apiUser = await this.apiUserService.findById(userId)
|
|
|
+ if (!apiUser) {
|
|
|
+ throw new BadRequestException("Can't find api user")
|
|
|
+ }
|
|
|
+ const { originalname, buffer, mimetype } = file
|
|
|
+ const md5 = this.calculateMD5(buffer)
|
|
|
+ const res = await ChatEmbedding.findAll({
|
|
|
+ where: {
|
|
|
+ name: md5
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if (res.length) {
|
|
|
+ return {
|
|
|
+ name: md5
|
|
|
+ }
|
|
|
+ }
|
|
|
+ const pdf = await PdfParse(buffer)
|
|
|
+ const contents = []
|
|
|
+ let paragraph = ''
|
|
|
+ pdf.text
|
|
|
+ .trim()
|
|
|
+ .split('\n')
|
|
|
+ .forEach((line) => {
|
|
|
+ line = line.trim()
|
|
|
+ paragraph += line
|
|
|
+ if (this.isFullSentence(line)) {
|
|
|
+ contents.push(paragraph)
|
|
|
+ paragraph = ''
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if (paragraph) {
|
|
|
+ contents.push(paragraph)
|
|
|
+ }
|
|
|
+
|
|
|
+ const embeddings = await this.createEmbeddings(contents)
|
|
|
+ Logger.log(
|
|
|
+ `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
|
|
|
+ )
|
|
|
+ let i = 0
|
|
|
+ for (const item of embeddings) {
|
|
|
+ try {
|
|
|
+ await ChatEmbedding.create({
|
|
|
+ name: md5,
|
|
|
+ text: item.text,
|
|
|
+ num: i++,
|
|
|
+ embedding: formatEmbedding(item.embedding)
|
|
|
+ })
|
|
|
+ } catch (error) {
|
|
|
+ Logger.error(error.message)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ apiUser.code = md5
|
|
|
+ this.apiUserService.update(userId, apiUser)
|
|
|
+ return {
|
|
|
+ name: md5
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|