| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- import PdfParse from '@cyber2024/pdf-parse-fixed'
- import { readFileSync } from 'fs'
- import cld from 'cld'
- import { Configuration, OpenAIApi } from 'azure-openai'
- import pg from 'pg'
- async function pdf2text(path) {
- const pdf = await PdfParse(readFileSync(path))
- const contents = []
- let newParagraph = ''
- pdf.text
- .trim()
- .split('\n')
- .forEach((line) => {
- line = line.trim()
- newParagraph += line
- if (isFullSentence(line)) {
- contents.push(newParagraph)
- newParagraph = ''
- }
- })
- if (newParagraph) {
- contents.push(newParagraph)
- }
- const lang = await cld.detect(contents.join('\n'))
- console.log(contents.length)
- }
- function isFullSentence(str) {
- return /[.!?。!?…;;::”’)】》」』〕〉》〗〞〟»"'\])}]+$/.test(str)
- }
- await pdf2text('/Users/drew/Downloads/《Python 3学习笔记(上卷)》_1-50.pdf')
- const openai = new OpenAIApi(
- new Configuration({
- apiKey: 'beb32e4625a94b65ba8bc0ba1688c4d2',
- // add azure info into configuration
- azure: {
- apiKey: 'beb32e4625a94b65ba8bc0ba1688c4d2',
- endpoint: 'https://zouma.openai.azure.com/'
- }
- })
- )
- // const response = await openai.createEmbedding({
- // model: 'embedding',
- // input: 'The food was delicious and the waiter...'
- // })
- // console.log(JSON.stringify(response.data, null, 4))
- const client = new pg.Client({
- host: '47.97.42.229',
- port: 5432,
- user: 'postgres',
- password: 'D$&g3a9BCJH&$Nzh',
- database: 'gpt_test',
- connectionTimeoutMillis: 5000
- })
- await client.connect()
- //table exists
- client.query(`create table if not exists public.chat_embedding (
- id integer primary key not null default nextval('embedding_id_seq'::regclass),
- name character varying,
- text character varying,
- embedding vector(1536)
- );`)
- const res = await client.query('SELECT * FROM chat_embedding')
- console.log(res.rows)
- client.end()
|