index.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. const fs = require('fs')
  2. const path = require('path')
  3. const Canvas = require('canvas')
  4. const exec = require('child_process').execSync
  5. const sharp = require('sharp')
  6. Canvas.registerFont('./pubg.ttf', { family: 'pubg' })
  7. var canvas0 = Canvas.createCanvas(320, 320)
  8. var ctx0 = canvas0.getContext('2d')
  9. ctx0.font = '30px pubg'
  10. let count = 0;
  11. const genGtData = (str) => {
  12. let metrics = ctx0.measureText(str)
  13. var canvas = Canvas.createCanvas(metrics.width + 10, metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent + 10)
  14. var ctx = canvas.getContext('2d')
  15. ctx.fillStyle = '#ffffff'
  16. ctx.fillRect(0, 0, metrics.width + 10, metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent + 10)
  17. ctx.font = '30px pubg'
  18. ctx.fillStyle = '#000000'
  19. ctx.fillText(str, 5, 5 + metrics.actualBoundingBoxAscent)
  20. var img_src = canvas.toBuffer().toString('base64')
  21. const base64Data = new Buffer(img_src.replace(/^data:image\/\w+;base64,/, ""), 'base64')
  22. sharp(base64Data)
  23. .tiff({
  24. compression: 'lzw',
  25. squash: true
  26. })
  27. .toFile(`./data/ground-truth/${count}.tif`)
  28. .then(res => {
  29. console.log(res)
  30. })
  31. fs.writeFileSync(`./data/ground-truth/${count}.gt.txt`, str + '\n');
  32. count++
  33. }
  34. const genTiffs = () => {
  35. for (i = 0; i < 1000; i++) {
  36. let str = ''
  37. for (let j = 0; j < 1 + Math.round(Math.random() * 10); j++) {
  38. switch (Math.round(Math.random() * 7)) {
  39. case 0:
  40. let num = (Math.random() * 40).toFixed(1);
  41. if (num.endsWith('.0')) {
  42. num = num.replace('.0', '')
  43. }
  44. str += `${num}分钟 `
  45. break
  46. case 1:
  47. str += `第${parseInt(Math.random() * 100 + 1)} `
  48. break
  49. case 2:
  50. case 3:
  51. case 4:
  52. let s = i.toFixed(1);
  53. if (s.endsWith('.0')) {
  54. s = s.replace('.0', '')
  55. }
  56. str += `${s} `
  57. break
  58. case 5:
  59. case 6:
  60. case 7:
  61. str += `/${parseInt(Math.random() * 100 + 1)} `
  62. break
  63. }
  64. }
  65. genGtData(str)
  66. }
  67. }
  68. const genTiff = () => {
  69. exec("find . -name 'pubg*' ! -name 'pubg.ttf' -d 1 -exec rm -f {} +")
  70. let chars = "1234567890./分钟第剩余淘汰"
  71. let trainingText = ''
  72. let width = 0, height = 0
  73. for (i = 0; i < 200; i++) {
  74. let line = ''
  75. for (j = 0; j < 50; j++) {
  76. line += chars[Math.round(Math.random() * (chars.length - 1))]
  77. }
  78. trainingText += line + '\n'
  79. let metrics = ctx0.measureText(line)
  80. if (metrics.width > width) {
  81. width = metrics.width
  82. }
  83. height += 40
  84. }
  85. var canvas = Canvas.createCanvas(width + 10, height + 10)
  86. var ctx = canvas.getContext('2d')
  87. ctx.fillStyle = '#ffffff'
  88. ctx.fillRect(0, 0, width + 10, height + 10)
  89. ctx.font = '30px pubg'
  90. ctx.fillStyle = '#000000'
  91. let boxInfo = ''
  92. let x = 5, y = 35, cx = 0, cy = 0, lastAscent = 0, lastDecent = 0, line = ''
  93. for (i in trainingText) {
  94. if (trainingText[i] == '\n') {
  95. let metrics = ctx0.measureText(line)
  96. boxInfo += `\t 5 ${Math.round(height + 10 - y - metrics.actualBoundingBoxDescent)} ${Math.round(metrics.width + 10)} ${Math.round(height + 10 - y + metrics.actualBoundingBoxAscent)} 0\n`
  97. y += 40
  98. x = 5
  99. line = ''
  100. } else {
  101. line += trainingText[i]
  102. ctx.fillText(trainingText[i], x, y)
  103. let metrics = ctx0.measureText(trainingText[i])
  104. boxInfo += `${trainingText[i]} ${Math.round(x)} ${Math.round(height + 10 - y - metrics.actualBoundingBoxDescent)} ${Math.round(x + metrics.width)} ${Math.round(height + 10 - y + metrics.actualBoundingBoxAscent)} 0\n`
  105. lastAscent = metrics.actualBoundingBoxAscent
  106. lastDecent = metrics.actualBoundingBoxDescent
  107. x += metrics.width
  108. }
  109. }
  110. fs.writeFileSync('pubg.font.exp0.box', boxInfo)
  111. fs.writeFileSync('font_properties', 'pubg 0 0 0 0 0')
  112. var img_src = canvas.toBuffer().toString('base64')
  113. const base64Data = new Buffer(img_src.replace(/^data:image\/\w+;base64,/, ""), 'base64')
  114. sharp(base64Data)
  115. .tiff({
  116. compression: 'lzw',
  117. squash: true
  118. })
  119. .toFile(`pubg.font.exp0.tif`)
  120. .then(res => {
  121. console.log(exec('tesseract pubg.font.exp0.tif pubg.font.exp0 nobatch box.train').toString())
  122. console.log(exec('unicharset_extractor pubg.font.exp0.box').toString())
  123. console.log(exec('shapeclustering -F font_properties -U unicharset -O pubg.unicharset pubg.font.exp0.tr').toString())
  124. console.log(exec('mftraining -F font_properties -U unicharset -O pubg.unicharset pubg.font.exp0.tr').toString())
  125. console.log(exec('cntraining pubg.font.exp0.tr').toString())
  126. console.log(exec('mv inttemp pubg.inttemp').toString())
  127. console.log(exec('mv pffmtable pubg.pffmtable').toString())
  128. console.log(exec('mv shapetable pubg.shapetable').toString())
  129. console.log(exec('mv normproto pubg.normproto').toString())
  130. console.log(exec('combine_tessdata pubg.').toString())
  131. })
  132. }
  133. genTiff()