| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- const fs = require('fs')
- const path = require('path')
- const Canvas = require('canvas')
- const exec = require('child_process').execSync
- const sharp = require('sharp')
- Canvas.registerFont('./pubg.ttf', { family: 'pubg' })
- var canvas0 = Canvas.createCanvas(320, 320)
- var ctx0 = canvas0.getContext('2d')
- ctx0.font = '30px pubg'
- let count = 0;
- const genGtData = (str) => {
- let metrics = ctx0.measureText(str)
- var canvas = Canvas.createCanvas(metrics.width + 10, metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent + 10)
- var ctx = canvas.getContext('2d')
- ctx.fillStyle = '#ffffff'
- ctx.fillRect(0, 0, metrics.width + 10, metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent + 10)
- ctx.font = '30px pubg'
- ctx.fillStyle = '#000000'
- ctx.fillText(str, 5, 5 + metrics.actualBoundingBoxAscent)
- var img_src = canvas.toBuffer().toString('base64')
- const base64Data = new Buffer(img_src.replace(/^data:image\/\w+;base64,/, ""), 'base64')
- sharp(base64Data)
- .tiff({
- compression: 'lzw',
- squash: true
- })
- .toFile(`./data/ground-truth/${count}.tif`)
- .then(res => {
- console.log(res)
- })
- fs.writeFileSync(`./data/ground-truth/${count}.gt.txt`, str + '\n');
- count++
- }
- const genTiffs = () => {
- for (i = 0; i < 1000; i++) {
- let str = ''
- for (let j = 0; j < 1 + Math.round(Math.random() * 10); j++) {
- switch (Math.round(Math.random() * 7)) {
- case 0:
- let num = (Math.random() * 40).toFixed(1);
- if (num.endsWith('.0')) {
- num = num.replace('.0', '')
- }
- str += `${num}分钟 `
- break
- case 1:
- str += `第${parseInt(Math.random() * 100 + 1)} `
- break
- case 2:
- case 3:
- case 4:
- let s = i.toFixed(1);
- if (s.endsWith('.0')) {
- s = s.replace('.0', '')
- }
- str += `${s} `
- break
- case 5:
- case 6:
- case 7:
- str += `/${parseInt(Math.random() * 100 + 1)} `
- break
- }
- }
- genGtData(str)
- }
- }
- const genTiff = () => {
- exec("find . -name 'pubg*' ! -name 'pubg.ttf' -d 1 -exec rm -f {} +")
- let chars = "1234567890./分钟第剩余淘汰"
- let trainingText = ''
- let width = 0, height = 0
- for (i = 0; i < 200; i++) {
- let line = ''
- for (j = 0; j < 50; j++) {
- line += chars[Math.round(Math.random() * (chars.length - 1))]
- }
- trainingText += line + '\n'
- let metrics = ctx0.measureText(line)
- if (metrics.width > width) {
- width = metrics.width
- }
- height += 40
- }
- var canvas = Canvas.createCanvas(width + 10, height + 10)
- var ctx = canvas.getContext('2d')
- ctx.fillStyle = '#ffffff'
- ctx.fillRect(0, 0, width + 10, height + 10)
- ctx.font = '30px pubg'
- ctx.fillStyle = '#000000'
- let boxInfo = ''
- let x = 5, y = 35, cx = 0, cy = 0, lastAscent = 0, lastDecent = 0, line = ''
- for (i in trainingText) {
- if (trainingText[i] == '\n') {
- let metrics = ctx0.measureText(line)
- boxInfo += `\t 5 ${Math.round(height + 10 - y - metrics.actualBoundingBoxDescent)} ${Math.round(metrics.width + 10)} ${Math.round(height + 10 - y + metrics.actualBoundingBoxAscent)} 0\n`
- y += 40
- x = 5
- line = ''
- } else {
- line += trainingText[i]
- ctx.fillText(trainingText[i], x, y)
- let metrics = ctx0.measureText(trainingText[i])
- boxInfo += `${trainingText[i]} ${Math.round(x)} ${Math.round(height + 10 - y - metrics.actualBoundingBoxDescent)} ${Math.round(x + metrics.width)} ${Math.round(height + 10 - y + metrics.actualBoundingBoxAscent)} 0\n`
- lastAscent = metrics.actualBoundingBoxAscent
- lastDecent = metrics.actualBoundingBoxDescent
- x += metrics.width
- }
- }
- fs.writeFileSync('pubg.font.exp0.box', boxInfo)
- fs.writeFileSync('font_properties', 'pubg 0 0 0 0 0')
- var img_src = canvas.toBuffer().toString('base64')
- const base64Data = new Buffer(img_src.replace(/^data:image\/\w+;base64,/, ""), 'base64')
- sharp(base64Data)
- .tiff({
- compression: 'lzw',
- squash: true
- })
- .toFile(`pubg.font.exp0.tif`)
- .then(res => {
- console.log(exec('tesseract pubg.font.exp0.tif pubg.font.exp0 nobatch box.train').toString())
- console.log(exec('unicharset_extractor pubg.font.exp0.box').toString())
- console.log(exec('shapeclustering -F font_properties -U unicharset -O pubg.unicharset pubg.font.exp0.tr').toString())
- console.log(exec('mftraining -F font_properties -U unicharset -O pubg.unicharset pubg.font.exp0.tr').toString())
- console.log(exec('cntraining pubg.font.exp0.tr').toString())
- console.log(exec('mv inttemp pubg.inttemp').toString())
- console.log(exec('mv pffmtable pubg.pffmtable').toString())
- console.log(exec('mv shapetable pubg.shapetable').toString())
- console.log(exec('mv normproto pubg.normproto').toString())
- console.log(exec('combine_tessdata pubg.').toString())
- })
- }
- genTiff()
|