paper.service.ts 13 KB


  1. import { HttpException, Injectable, InternalServerErrorException, Logger, OnModuleInit } from '@nestjs/common'
  2. import { InjectRepository } from '@nestjs/typeorm'
  3. import { PaperOrder, PaperOrderStatus } from './entities/paper-order.entity'
  4. import { Repository, Like, Between } from 'typeorm'
  5. import { PaperGenResult } from './entities/paper-gen-result.entity'
  6. import { Pagination, paginate } from 'nestjs-typeorm-paginate'
  7. import { PageRequest } from 'src/common/dto/page-request'
  8. import { CreatePaperOrderDto } from './dto/create-order.dto'
  9. import { genPaper } from './paper-gen'
  10. import { genGeneralPaper1 } from './paper-gen/general'
  11. import { createLLM } from './paper-gen/llm'
  12. import { genChapters } from './paper-gen/chapter'
  13. import { HumanMessage } from 'langchain/schema'
  14. import axios from 'axios'
  15. import * as qs from 'qs'
  16. import { load } from 'cheerio'
  17. import { parse } from 'date-fns'
  18. import { RefSearchRecord } from './entities/ref-search-record.entity'
  19. import { MembershipService } from 'src/membership/membership.service'
  20. import { startOfDay, endOfDay } from 'date-fns'
  21. import { MemberType } from 'src/membership/entities/membership.entity'
  22. import { PaperOrderAttachment } from './entities/paper-order-attachment.entity'
  23. @Injectable()
  24. export class PaperService implements OnModuleInit {
  25. constructor(
  26. @InjectRepository(PaperOrder)
  27. private readonly paperOrderRepository: Repository<PaperOrder>,
  28. @InjectRepository(PaperGenResult)
  29. private readonly paperGenResultRepository: Repository<PaperGenResult>,
  30. @InjectRepository(RefSearchRecord)
  31. private readonly refSearchRecordRepository: Repository<RefSearchRecord>,
  32. private readonly membershipService: MembershipService,
  33. @InjectRepository(PaperOrderAttachment)
  34. private readonly paperOrderAttachmentRepository: Repository<PaperOrderAttachment>
  35. ) {}
  36. async onModuleInit() {
  37. for (const order of await this.paperOrderRepository.findBy({
  38. status: PaperOrderStatus.Generating
  39. })) {
  40. if (
  41. (await this.paperGenResultRepository.countBy({
  42. orderId: order.id
  43. })) === 0
  44. ) {
  45. order.status = PaperOrderStatus.Pending
  46. } else {
  47. order.status = PaperOrderStatus.Complete
  48. }
  49. await this.paperOrderRepository.save(order)
  50. }
  51. }
  52. async findAllOrders(req, pageReq: PageRequest<PaperOrder>) {
  53. ;(pageReq as any).search.where = (pageReq as any).search.where || {}
  54. let where = (pageReq as any).search.where
  55. if (!req.user.roles.includes('paperGen')) {
  56. where.userId = req.user.id
  57. }
  58. if (where.title) {
  59. where.title = Like(`%${where.title}%`)
  60. }
  61. return await paginate<PaperOrder>(this.paperOrderRepository, pageReq.page, pageReq.search)
  62. }
  63. async findOrderById(id: number): Promise<PaperOrder> {
  64. return await this.paperOrderRepository.findOneByOrFail({
  65. id
  66. })
  67. }
  68. async findAllResults(req: PageRequest<PaperGenResult>) {
  69. return await paginate<PaperGenResult>(this.paperGenResultRepository, req.page, req.search)
  70. }
  71. async createOrder(dto: CreatePaperOrderDto) {
  72. return await this.paperOrderRepository.save(dto)
  73. }
  74. async updateOrder(id: number, data) {
  75. return await this.paperOrderRepository.update(id, data)
  76. }
  77. async gen(orderId: number) {
  78. const order = await this.findOrderById(orderId)
  79. if (order.status === PaperOrderStatus.Generating) {
  80. throw new InternalServerErrorException('正在生成中,请稍后再试')
  81. }
  82. order.status = PaperOrderStatus.Generating
  83. await this.paperOrderRepository.save(order)
  84. this.genPaper(order)
  85. }
  86. async genChapters(id: number, description: string) {
  87. const order = await this.findOrderById(id)
  88. const tools = createLLM()
  89. const chapters = await genChapters(tools, order.major, order.title, description)
  90. order.chapters = chapters
  91. order.description = description
  92. await this.paperOrderRepository.save(order)
  93. return chapters
  94. }
  95. async genPaper(order: PaperOrder) {
  96. try {
  97. let content, duration, tokenUsage, fileUrl
  98. if (/计算机/.test(order.major)) {
  99. ;({ content, duration, tokenUsage, fileUrl } = await genPaper(order.title, order.description))
  100. } else {
  101. ;({ content, duration, tokenUsage, fileUrl } = await genGeneralPaper1(
  102. order.major,
  103. order.title,
  104. order.description,
  105. order.chapters
  106. ))
  107. }
  108. await this.paperGenResultRepository.save({
  109. orderId: order.id,
  110. content,
  111. duration,
  112. tokenUsage,
  113. fileUrl
  114. })
  115. order.status = PaperOrderStatus.Complete
  116. await this.paperOrderRepository.save(order)
  117. } catch (error) {
  118. Logger.error('生成论文失败', error.stack)
  119. order.status = PaperOrderStatus.Pending
  120. await this.paperOrderRepository.save(order)
  121. }
  122. }
  123. async searchReferences(userId: number, title: string) {
  124. let record = await this.refSearchRecordRepository.findOneBy({
  125. userId,
  126. createdAt: Between(startOfDay(new Date()), endOfDay(new Date()))
  127. })
  128. if (record) {
  129. const member = await this.membershipService.getMembership(userId, false)
  130. if (!member || member.isExpired || member.memberType === MemberType.Trial) {
  131. throw new InternalServerErrorException('USAGE_LIMIT_EXCEEDED')
  132. }
  133. }
  134. // const { llm } = createLLM()
  135. // const { content } = await llm.call([
  136. // new HumanMessage(`你要从我给你的论文标题中提取2-3个由逗号分隔的主要关键词。\n\n标题:${title}\n关键词:`)
  137. // ])
  138. // const keywords = content.replace(/,/g, ',').split(',')
  139. const keywords = [title]
  140. const http = axios.create({
  141. headers: {
  142. Host: 'kns.cnki.net',
  143. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  144. 'X-Requested-With': 'XMLHttpRequest',
  145. 'sec-ch-ua-mobile': '?0',
  146. 'User-Agent':
  147. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  148. 'sec-ch-ua-platform': 'macOS',
  149. Origin: 'https://kns.cnki.net',
  150. 'Sec-Fetch-Site': 'same-origin',
  151. 'Sec-Fetch-Mode': 'cors',
  152. 'Sec-Fetch-Dest': 'empty',
  153. Referer:
  154. 'https://kns.cnki.net/kns8s/defaultresult/index?crossids=YSTT4HG0%2CLSTPFY1C%2CJUP3MUPD%2CMPMFIG1A%2CWQ0UVIAA%2CBLZOG7CK%2CEMRPGLPA%2CPWFIRAGL%2CNLBO1Z6R%2CNN3FJMUV&korder=SU&kw=%E9%98%BF%E6%96%AF%E9%A1%BF',
  155. 'Accept-Encoding': 'gzip, deflate, br',
  156. 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,und;q=0.6',
  157. Cookie: 'Ecp_notFirstLogin=8TNR2E; KNS3COOKIE=1705297590.529.38190.813537|b25e41a932fd162af3b8c5cff4059fc3; Ecp_ClientId=m240115134600532586; Ecp_IpLoginFail=240115180.111.242.2; SID_sug=128008; SID_kns_new=kns25128005; Ecp_ClientIp=180.111.242.2; SID_restapi=018105; LID=WEEvREcwSlJHSldTTEYyTE5mZ3VrVmtob3VPdjBpS29QOWl1NG1pRVJqWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!; Ecp_session=1; Ecp_showrealname=1; Ecp_LoginStuts={"IsAutoLogin":false,"UserName":"15077886171","ShowName":"15077886171","UserType":"jf","BUserName":"","BShowName":"","BUserType":"","r":"8TNR2E","Members":[]}; Ecp_loginuserjf=15077886171; c_m_LinID=LinID=WEEvREcwSlJHSldTTEYyTE5mZ3VrVmtob3VPdjBpS29QOWl1NG1pRVJqWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!&ot=02%2F14%2F2024%2013%3A52%3A01; c_m_expire=2024-02-14%2013%3A52%3A01; dblang=both'
  158. }
  159. })
  160. let en = []
  161. let zh = []
  162. for (let kw of keywords) {
  163. const res = await http.post(
  164. 'https://kns.cnki.net/kns8s/brief/grid',
  165. qs.stringify({
  166. boolSearch: true,
  167. QueryJson: `{"Platform":"","Resource":"CROSSDB","Classid":"WD0FTY92","Products":"","QNode":{"QGroup":[{"Key":"Subject","Title":"","Logic":0,"Items":[{"Field":"SU","Value":"${kw}","Operator":"TOPRANK","Logic":0}],"ChildItems":[]}]},"ExScope":1,"SearchType":2,"Rlang":"CHINESE","KuaKuCode":"YSTT4HG0,LSTPFY1C,JUP3MUPD,MPMFIG1A,EMRPGLPA,WQ0UVIAA,BLZOG7CK,PWFIRAGL,NN3FJMUV,NLBO1Z6R"}`,
  168. pageNum: 1,
  169. pageSize: 20,
  170. dstyle: 'listmode',
  171. boolSortSearch: false,
  172. productStr:
  173. 'YSTT4HG0,LSTPFY1C,RMJLXHZ3,JQIRZIYA,JUP3MUPD,1UR4K4HZ,BPBAFJ5S,R79MZMCB,MPMFIG1A,EMRPGLPA,J708GVCE,ML4DRIDX,WQ0UVIAA,NB3BWEHK,XVLO76FD,HR1YT1Z9,BLZOG7CK,PWFIRAGL,NN3FJMUV,NLBO1Z6R,',
  174. aside: `主题:${kw}`,
  175. searchFrom: '资源范围:总库',
  176. CurPage: 1
  177. })
  178. )
  179. zh = zh.concat(this.parseRefHtml(res.data))
  180. const res1 = await http.post(
  181. 'https://kns.cnki.net/kns8s/brief/grid',
  182. qs.stringify({
  183. boolSearch: true,
  184. QueryJson: `{"Platform":"","Resource":"CROSSDB","Classid":"WD0FTY92","Products":"","QNode":{"QGroup":[{"Key":"Subject","Title":"","Logic":0,"Items":[{"Field":"SU","Value":"${kw}","Operator":"TOPRANK","Logic":0}],"ChildItems":[]},{"Key":"SCDBGroup","Title":"","Logic":0,"Items":[],"ChildItems":[{"Key":"LG","Title":"","Logic":0,"Items":[{"Key":"EN","Title":"英语","Logic":1,"Field":"LG","Operator":"DEFAULT","Value":"EN","Value2":"","Name":"LG","ExtendType":0}],"ChildItems":[]}]}]},"ExScope":1,"SearchType":6,"Rlang":"FOREIGN","KuaKuCode":"YSTT4HG0,LSTPFY1C,JUP3MUPD,MPMFIG1A,WQ0UVIAA,BLZOG7CK,EMRPGLPA,PWFIRAGL,NLBO1Z6R,NN3FJMUV","View":""}`,
  185. pageNum: 1,
  186. pageSize: 20,
  187. dstyle: 'listmode',
  188. boolSortSearch: false,
  189. productStr:
  190. 'YSTT4HG0,LSTPFY1C,RMJLXHZ3,JQIRZIYA,JUP3MUPD,1UR4K4HZ,BPBAFJ5S,R79MZMCB,MPMFIG1A,WQ0UVIAA,NB3BWEHK,XVLO76FD,HR1YT1Z9,BLZOG7CK,EMRPGLPA,J708GVCE,ML4DRIDX,PWFIRAGL,NLBO1Z6R,NN3FJMUV,',
  191. aside: `主题:${kw}`,
  192. searchFrom: '资源范围:总库',
  193. CurPage: 1
  194. })
  195. )
  196. en = en.concat(this.parseRefHtml(res1.data))
  197. }
  198. zh = this.sortDeduplication(zh.slice(0, 20))
  199. en = this.sortDeduplication(en.slice(0, 10))
  200. async function getRef(arr) {
  201. await Promise.all(
  202. arr.map((item) => {
  203. return http
  204. .post(
  205. 'https://kns.cnki.net/dm8/API/GetExport',
  206. qs.stringify({
  207. filename: item.filename,
  208. displaymode: 'GBTREFER,MLA,APA',
  209. uniplatform: 'NZKPT'
  210. })
  211. )
  212. .then((res) => {
  213. item.ref = res.data.data.map((i) => i.value[0].replace(/^\[\d+\]/, ''))
  214. })
  215. })
  216. )
  217. }
  218. await getRef(zh)
  219. await getRef(en)
  220. await this.refSearchRecordRepository.save({
  221. userId,
  222. title
  223. })
  224. return { zh, en }
  225. }
  226. parseRefHtml(html: any) {
  227. const $ = load(html)
  228. const trs = $('tr')
  229. const res = []
  230. trs.each((i, tr) => {
  231. let $$ = load(tr)
  232. const filename = $$('td.seq .cbItem').attr('value')
  233. if (filename) {
  234. const name = $$('td.name a').text()
  235. const date = $$('td.date').text()
  236. res.push({
  237. filename,
  238. name,
  239. date
  240. })
  241. }
  242. })
  243. return res
  244. }
  245. sortDeduplication(arr: any[]) {
  246. let res = []
  247. arr.forEach((item) => {
  248. if (!res.find((i) => i.filename === item.filename)) {
  249. res.push(item)
  250. }
  251. })
  252. res.sort((a, b) => {
  253. return parse(b.date, 'yyyy-MM-dd', new Date()).getTime() - parse(a.date, 'yyyy-MM-dd', new Date()).getTime()
  254. })
  255. return res
  256. }
  257. async attachments(req: PageRequest<PaperOrderAttachment>) {
  258. return await paginate<PaperOrderAttachment>(this.paperOrderAttachmentRepository, req.page, req.search)
  259. }
  260. async createAttachment(dto: PaperOrderAttachment) {
  261. return await this.paperOrderAttachmentRepository.save(dto)
  262. }
  263. async deleteAttachment(id: number) {
  264. return await this.paperOrderAttachmentRepository.delete(id)
  265. }
  266. }