xiongzhu 2 anni fa
parent
commit
8b5020671c

+ 3 - 2
src/paper/paper-gen/abstract.ts

@@ -2,8 +2,9 @@ import { HumanMessage, SystemMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import { uploadUml } from './upload'
+import { LLMTools } from './types'
 
-export async function _createAbstract(tools, title, desc) {
+export async function _createAbstract(tools: LLMTools, title, desc) {
     const { llm, usage, conversation } = tools
     let { content } = await llm.call([
         new HumanMessage(`我正在给我的毕业设计撰写一篇论文
@@ -26,7 +27,7 @@ ${desc}
     return `# 摘要\n\n${abstract}\n\n# Abstract\n\n${translated}`
 }
 
-export async function createAbstract(tools, title, desc) {
+export async function createAbstract(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     return await pRetry(() => _createAbstract(tools, title, desc), { retries: 5 })
 }

+ 3 - 2
src/paper/paper-gen/arch.ts

@@ -2,8 +2,9 @@ import { HumanMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import { uploadUml } from './upload'
+import { LLMTools } from './types'
 
-export async function _createArch(tools, title, desc) {
+export async function _createArch(tools: LLMTools, title, desc) {
     const { llm, usage, conversation } = tools
     let { content: code } = await llm.call([
         new HumanMessage(`我的毕业设计项目是${title}
@@ -50,7 +51,7 @@ rectangle 租客管理
     return { url }
 }
 
-export async function createArch(tools, title, desc) {
+export async function createArch(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     return await pRetry(() => _createArch(tools, title, desc), { retries: 5 })
 }

+ 4 - 30
src/paper/paper-gen/chapter.ts

@@ -2,8 +2,9 @@ import { HumanMessage, SystemMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import { uploadUml } from './upload'
+import { LLMTools } from './types'
 
-async function _genChapters(tools, major, title, desc) {
+async function _genChapters(tools: LLMTools, major: string, title: string, desc: string) {
     const { llm, usage, conversation } = tools
     const scheme = StructuredOutputParser.fromZodSchema(
         z.array(
@@ -48,36 +49,9 @@ ${scheme.getFormatInstructions()}
     return await scheme.parse(response)
 }
 
-async function genSections(tools, major, title, desc, chapters, i) {
-    const { llm, usage, conversation } = tools
-
-    const scheme = StructuredOutputParser.fromZodSchema(
-        z.array(
-            z.object({
-                sectionName: z.string().describe('小节名称'),
-                sectionDesc: z.string().describe('小节描述')
-            })
-        )
-    )
-
-    const { content } = await llm.call([
-        new SystemMessage(`你是一个擅长写${major}专业毕业论文的专家,你的任务是帮我完成我的论文。
-    你要根据我的要求,帮我完成我的论文`),
-        new HumanMessage(`${scheme.getFormatInstructions()}
-------
-我的论文标题是${title}
-目前我们已经确定了论文的粗略大纲:
-${chapters.map((i) => `- ${i.chapterName}\n    ${i.chapterDesc}`).join('\n')}
-
-现在我们来完善每章的小节
-首先是第${i + 1}章${chapters[i].chapterName}:`)
-    ])
-    chapters[i].sections = await scheme.parse(content)
-}
-
-export async function genChapters(tools, major, title, desc) {
+export async function genChapters(tools: LLMTools, major, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
-    const chapters = await pRetry(() => _genChapters(major, tools, title, desc), {
+    const chapters = await pRetry(() => _genChapters(tools, major, title, desc), {
         retries: 5,
         onFailedAttempt: (e) => console.log(e.stack)
     })

+ 3 - 2
src/paper/paper-gen/er.ts

@@ -2,8 +2,9 @@ import { HumanMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import { uploadUml } from './upload'
+import { LLMTools } from './types'
 
-async function genErNames(tools, title, desc) {
+async function genErNames(tools: LLMTools, title, desc) {
     const { llm, usage, conversation } = tools
     const scheme = StructuredOutputParser.fromZodSchema(
         z.array(
@@ -66,7 +67,7 @@ price
     }
 }
 
-export async function createER(tools, title, desc) {
+export async function createER(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     const list = await pRetry(() => genErNames(tools, title, desc), { retries: 5 })
     return await Promise.all(list.map((item) => pRetry(() => plotER(tools, title, desc, item.title), { retries: 5 })))

+ 3 - 2
src/paper/paper-gen/flow.ts

@@ -2,8 +2,9 @@ import { HumanMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import { uploadUml } from './upload'
+import { LLMTools } from './types'
 
-async function genFlowNames(tools, title, desc) {
+async function genFlowNames(tools: LLMTools, title, desc) {
     const { llm, usage, conversation } = tools
     const scheme = StructuredOutputParser.fromZodSchema(
         z.array(
@@ -63,7 +64,7 @@ stop
     }
 }
 
-export async function createFlow(tools, title, desc) {
+export async function createFlow(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     const list = await pRetry(() => genFlowNames(tools, title, desc), { retries: 5 })
     return await Promise.all(list.map((item) => pRetry(() => plotFlow(tools, title, desc, item.title), { retries: 5 })))

+ 31 - 12
src/paper/paper-gen/general.ts

@@ -16,7 +16,7 @@ export async function genGeneralPaper(major: string, title: string, desc: string
     const { llm, usage, conversation } = tools
     const pRetry = (await eval("import('p-retry')")).default
 
-    const chapters = await genChapters(major, tools, title, desc)
+    const chapters = await genChapters(tools, major, title, desc)
     Logger.log(JSON.stringify(chapters, null, 4))
 
     let paper = new WritableStreamBuffer()
@@ -83,7 +83,7 @@ export async function genGeneralPaper1(major: string, title: string, desc: strin
     const { llm, usage, conversation } = tools
     const pRetry = (await eval("import('p-retry')")).default
 
-    const chapters = await genChapters(major, tools, title, desc)
+    const chapters = await genChapters(tools, major, title, desc)
     Logger.log(JSON.stringify(chapters, null, 4))
 
     let paper = new WritableStreamBuffer()
@@ -93,20 +93,23 @@ export async function genGeneralPaper1(major: string, title: string, desc: strin
 
     for (let i = 0; i < chapters.length; i++) {
         const sysPrompt = `你是一个擅长写${major}专业毕业论文的专家。
-你的任务是写一篇标题为"${title}"的论文。以下是本论文的大纲,请你根据大纲分章节帮我生成文章:
-${chapters.map((i) => `- ${i.chapterName}\n    ${i.chapterDesc}`).join('\n')}`
-        let prompt = `请开始完成第${i + 1}章:${chapters[i].chapterName},本章主要内容是${chapters[i].chapterDesc}`
+你的任务是写一篇标题为"${title}"的论文。
+以下是论文的大纲:
+${chapters.map((i, index) => `${index + 1}. ${i.chapterName}\n    ${i.chapterDesc}`).join('\n')}
+
+你要根据大纲分章节帮我生成文章。
+你的语气应正式,内容应适合和吸引一般读者。
+你要严格按照markdown格式返回内容。
+你只需要输出能够直接出现在论文中的内容,不需要输出你对该内容的总结或者介绍等其他文字,不需要出现介绍下一章或其他承上启下的文字。`
+        let prompt = `第${i + 1}章是:${chapters[i].chapterName},本章主要内容是${chapters[i].chapterDesc}`
         if (chapters[i].sections && chapters[i].sections.length) {
-            prompt += `\n本章分为多个小节,分别是:
+            prompt += `\n本章分为多个小节,每小节的主要论述内容应采用五段式结构进行论述,论述内容应尽量详细且逻辑清晰。
+以下是各小节的简介:
 ${chapters[i].sections.map((e) => `- ${e.sectionName}\n    ${e.sectionDesc}`).join('\n')}
 
-请按照此结构完成本章内容`
+请按照此结构一次性完成本章内容`
         }
         prompt += `\n
-每小节的主要论述内容应采用五段式结构进行论述,论述内容应尽量详细且逻辑清晰。
-语气应正式,内容应适合和吸引一般读者。
-请严格按照markdown格式返回内容。
-你只需要输出能够直接出现在论文中的内容,不需要输出你对该内容的总结或者介绍等其他文字,不需要出现介绍下一章或其他承上启下的文字。
 第${i + 1}章:# ${chapters[i].chapterName}:`
         const { content } = await llm.call([new SystemMessage(sysPrompt), new HumanMessage(prompt)])
         if (!content.trim().split('\n')[0].includes(chapters[i].chapterName)) {
@@ -115,7 +118,23 @@ ${chapters[i].sections.map((e) => `- ${e.sectionName}\n    ${e.sectionDesc}`).jo
         paper.write('\n\n' + content)
     }
 
-    const content = paper.getContentsAsString('utf8')
+    let content = paper.getContentsAsString('utf8').split('\n')
+    for (let i = 0; i < content.length; i++) {
+        const match = content[i].match(/(^#+)([^ #]+)/)
+        if (match) {
+            content[i] = match[1] + ' ' + match[2]
+        }
+        if (/^#+.*$/.test(content[i])) {
+            if (content[i - 1] && content[i - 1].length) {
+                content[i] = '\n' + content[i]
+            }
+            if (content[i + 1] && content[i + 1].length) {
+                content[i] += '\n'
+            }
+        }
+    }
+    content = content.join('\n')
+
     const duration = (Date.now() - startTime) / 1000
     const fileUrl = await uploadDoc(title, content)
     return {

+ 2 - 1
src/paper/paper-gen/llm.ts

@@ -7,8 +7,9 @@ import { BufferMemory, BufferWindowMemory } from 'langchain/memory'
 import { Logger } from '@nestjs/common'
 import { writeFile, mkdirSync, appendFileSync } from 'fs'
 import path = require('path')
+import { LLMTools } from './types'
 require('dotenv').config()
-export function createLLM(model = 'gpt-3.5-turbo-16k') {
+export function createLLM(model = 'gpt-3.5-turbo-16k') : LLMTools {
     const usage = { completionTokens: 0, promptTokens: 0, totalTokens: 0 }
     const llm = new ChatOpenAI({
         openAIApiKey: process.env.OPENAI_API_KEY,

+ 3 - 2
src/paper/paper-gen/table.ts

@@ -2,8 +2,9 @@ import { HumanMessage } from 'langchain/schema'
 import { StructuredOutputParser } from 'langchain/output_parsers'
 import { z } from 'zod'
 import pRetry from 'p-retry'
+import { LLMTools } from './types'
 
-async function listTables(tools, title, desc) {
+async function listTables(tools: LLMTools, title, desc) {
     const { llm, usage, conversation } = tools
     const scheme = StructuredOutputParser.fromZodSchema(z.array(z.string().describe('表名')))
     const res = await llm.call([
@@ -37,7 +38,7 @@ ${list.map((i) => `- ${i}`).join('\n')}
     }
 }
 
-export async function createTable(tools, title, desc) {
+export async function createTable(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     const list = await pRetry(() => listTables(tools, title, desc), { retries: 5 })
 

+ 19 - 0
src/paper/paper-gen/types.ts

@@ -0,0 +1,19 @@
+import { ConversationChain } from 'langchain/chains'
+import { ChatOpenAI, ChatOpenAICallOptions } from 'langchain/chat_models/openai'
+import { BufferWindowMemory } from 'langchain/memory'
+
+export declare type LLMTools = {
+    llm: ChatOpenAI<ChatOpenAICallOptions>
+    usage: {
+        completionTokens: number
+        promptTokens: number
+        totalTokens: number
+    }
+    conversation(
+        system: any,
+        k?: number
+    ): {
+        memory: BufferWindowMemory
+        chain: ConversationChain
+    }
+}

+ 0 - 1
src/paper/paper-gen/upload.ts

@@ -58,7 +58,6 @@ export async function uploadUml(plantUml) {
 
 export async function uploadDoc(title, content) {
     Logger.log(`title: ${title}`, '生成docx')
-    Logger.log(`content: ${content}`, '生成docx')
     const s3 = new S3Client({
         region: process.env.ALIYUN_OSS_REGION,
         endpoint: `https://${process.env.ALIYUN_OSS_ENDPOINT}`,

+ 3 - 3
src/paper/paper-gen/useCase.ts

@@ -4,6 +4,7 @@ import { z } from 'zod'
 import { uploadUml } from './upload'
 import { createLLM } from './llm'
 import { Logger } from '@nestjs/common'
+import { LLMTools } from './types'
 async function genRoles(tools, title, desc) {
     const { llm, usage, conversation } = tools
 
@@ -25,7 +26,7 @@ ${desc}
     ])
     return await scheme.parse(content)
 }
-export async function _createUseCase(tools, title, desc, roles, i) {
+export async function _createUseCase(tools: LLMTools, title, desc, roles, i) {
     const { llm } = tools
 
     let { content: code } = await llm.call([
@@ -44,7 +45,7 @@ ${roles.map((e, i) => `${i + 1}. ${e.roleName}: ${e.roleDesc}`).join('\n')}
     return { name: `${roles[i].roleName}`, url, desc: `${roles[i].roleDesc}` }
 }
 
-export async function createUseCase(tools, title, desc) {
+export async function createUseCase(tools: LLMTools, title, desc) {
     const pRetry = (await eval("import('p-retry')")).default
     const roles = await pRetry(() => genRoles(tools, title, desc), {
         retries: 5,
@@ -63,4 +64,3 @@ export async function createUseCase(tools, title, desc) {
         )
     )
 }
-