|
|
@@ -246,72 +246,72 @@ export class KnowledgeBaseService implements OnModuleInit {
|
|
|
this.processExcelKnowledgeFile(knowledgeFile, buffer)
|
|
|
break
|
|
|
case 'application/pdf':
|
|
|
- // this.processPdfKnowledgeFile(knowledgeFile, buffer)
|
|
|
- this.processFile(knowledgeFile, buffer)
|
|
|
+ this.processPdfKnowledgeFile(knowledgeFile, buffer)
|
|
|
+ // this.processFile(knowledgeFile, buffer)
|
|
|
break
|
|
|
}
|
|
|
return knowledgeFile
|
|
|
}
|
|
|
|
|
|
- // public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
|
|
|
- // knowledgeFile.status = FileStatus.PROCESSING
|
|
|
- // try {
|
|
|
- // await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
- // const pdf = await PdfParse(buffer)
|
|
|
- // const contents = []
|
|
|
- // let paragraph = ''
|
|
|
- // pdf.text
|
|
|
- // .trim()
|
|
|
- // .split('\n')
|
|
|
- // .forEach((line) => {
|
|
|
- // line = line.trim()
|
|
|
- // paragraph += line
|
|
|
- // if (this.isFullSentence(line)) {
|
|
|
- // contents.push(paragraph)
|
|
|
- // paragraph = ''
|
|
|
- // }
|
|
|
- // })
|
|
|
- // if (paragraph) {
|
|
|
- // contents.push(paragraph)
|
|
|
- // }
|
|
|
+ public async processPdfKnowledgeFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
|
|
|
+ knowledgeFile.status = FileStatus.PROCESSING
|
|
|
+ try {
|
|
|
+ await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
+ const pdf = await PdfParse(buffer)
|
|
|
+ const contents = []
|
|
|
+ let paragraph = ''
|
|
|
+ pdf.text
|
|
|
+ .trim()
|
|
|
+ .split('\n')
|
|
|
+ .forEach((line) => {
|
|
|
+ line = line.trim()
|
|
|
+ paragraph += line
|
|
|
+ if (this.isFullSentence(line)) {
|
|
|
+ contents.push(paragraph)
|
|
|
+ paragraph = ''
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if (paragraph) {
|
|
|
+ contents.push(paragraph)
|
|
|
+ }
|
|
|
|
|
|
- // const embeddings = await this.createEmbeddings(
|
|
|
- // contents.map((i) => {
|
|
|
- // return { text: i }
|
|
|
- // })
|
|
|
- // )
|
|
|
- // Logger.log(
|
|
|
- // `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
|
|
|
- // )
|
|
|
- // await KnowledgeEmbedding.destroy({
|
|
|
- // where: {
|
|
|
- // fileHash: knowledgeFile.fileHash
|
|
|
- // }
|
|
|
- // })
|
|
|
- // let i = 0
|
|
|
- // for (const item of embeddings) {
|
|
|
- // try {
|
|
|
- // await KnowledgeEmbedding.create({
|
|
|
- // orgId: knowledgeFile.orgId,
|
|
|
- // knowledgeId: knowledgeFile.knowledgeId,
|
|
|
- // fileId: knowledgeFile.id,
|
|
|
- // fileHash: knowledgeFile.fileHash,
|
|
|
- // text: item.text,
|
|
|
- // embedding: formatEmbedding(item.embedding),
|
|
|
- // index: i++
|
|
|
- // })
|
|
|
- // } catch (error) {
|
|
|
- // Logger.error(error.message)
|
|
|
- // }
|
|
|
- // }
|
|
|
- // knowledgeFile.status = FileStatus.DONE
|
|
|
- // await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
- // } catch (e) {
|
|
|
- // knowledgeFile.status = FileStatus.FAILED
|
|
|
- // knowledgeFile.error = e.message
|
|
|
- // await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
- // }
|
|
|
- // }
|
|
|
+ const embeddings = await this.createEmbeddings(
|
|
|
+ contents.map((i) => {
|
|
|
+ return { text: i }
|
|
|
+ })
|
|
|
+ )
|
|
|
+ Logger.log(
|
|
|
+ `create embeddings finished, total token usage: ${embeddings.reduce((acc, cur) => acc + cur.token, 0)}`
|
|
|
+ )
|
|
|
+ await KnowledgeEmbedding.destroy({
|
|
|
+ where: {
|
|
|
+ fileHash: knowledgeFile.fileHash
|
|
|
+ }
|
|
|
+ })
|
|
|
+ let i = 0
|
|
|
+ for (const item of embeddings) {
|
|
|
+ try {
|
|
|
+ await KnowledgeEmbedding.create({
|
|
|
+ orgId: knowledgeFile.orgId,
|
|
|
+ knowledgeId: knowledgeFile.knowledgeId,
|
|
|
+ fileId: knowledgeFile.id,
|
|
|
+ fileHash: knowledgeFile.fileHash,
|
|
|
+ text: item.text,
|
|
|
+ embedding: formatEmbedding(item.embedding),
|
|
|
+ index: i++
|
|
|
+ })
|
|
|
+ } catch (error) {
|
|
|
+ Logger.error(error.message)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ knowledgeFile.status = FileStatus.DONE
|
|
|
+ await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
+ } catch (e) {
|
|
|
+ knowledgeFile.status = FileStatus.FAILED
|
|
|
+ knowledgeFile.error = e.message
|
|
|
+ await this.knowledgeFileRepository.save(knowledgeFile)
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
public async processFile(knowledgeFile: KnowledgeFile, buffer: Buffer) {
|
|
|
knowledgeFile.status = FileStatus.PROCESSING
|