generate.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. const fs = require('fs')
  2. const isEqual = require('lodash.isequal')
  3. const SCRIPTS = {}
  4. const include = function (src) {
  5. // Some black magic of eval. Load the script from src to global scope. Source: https://stackoverflow.com/a/23699187/17140794
  6. (1, eval)(src.toString())
  7. }
  8. async function loadScript(src) {
  9. const script = await fetch(src)
  10. const text = await script.text()
  11. include(text)
  12. }
  13. async function loadLanguages(lngs) {
  14. if (lngs) {
  15. lngs = Array.isArray(lngs) ? lngs : [lngs];
  16. for (const lng of lngs) {
  17. await loadLanguage(lng)
  18. }
  19. }
  20. }
  21. let langNumber = 0
  22. async function loadLanguage(lng) {
  23. if (!components.languages[lng].title) {
  24. return
  25. }
  26. await loadLanguages(components.languages[lng].optional)
  27. await loadLanguages(components.languages[lng].require)
  28. await loadLanguages(components.languages[lng].modify)
  29. if (!SCRIPTS[lng]) {
  30. SCRIPTS[lng] = true
  31. langNumber += 1
  32. console.log(`${langNumber} | Loading ${lng}`);
  33. // TODO: version should probably not be hardcoded
  34. await loadScript(`https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-${lng}.min.js`)
  35. }
  36. }
  37. function loadLocalLanguage(path, code, title, alias) {
  38. include(fs.readFileSync(path))
  39. components.languages[code] = {
  40. title: title,
  41. alias: alias
  42. }
  43. }
  44. function unique(a, fn) {
  45. if (a.length === 0 || a.length === 1) {
  46. return a;
  47. }
  48. if (!fn) {
  49. return a;
  50. }
  51. for (let i = 0; i < a.length; i++) {
  52. for (let j = i + 1; j < a.length; j++) {
  53. if (fn(a[i], a[j])) {
  54. a.splice(i, 1);
  55. }
  56. }
  57. }
  58. return a;
  59. }
  60. function uniqlo(a, fn) {
  61. var size = a.length;
  62. do {
  63. size = a.length
  64. a = unique(a, fn)
  65. }
  66. while (size > a.length)
  67. return a
  68. }
  69. async function generate() {
  70. var tempPatterns = []
  71. var tempLanguages = {}
  72. var tempTokens = []
  73. var tempGrammars = []
  74. var weak = new WeakMap
  75. function flatten(grammar) {
  76. var keys = {}
  77. var cache = weak.get(grammar)
  78. if (cache !== undefined) {
  79. return cache
  80. }
  81. weak.set(grammar, keys)
  82. var copy = grammar;
  83. var rest = copy.rest;
  84. if (rest) {
  85. copy = {}
  86. Object.keys(grammar).forEach(name => {
  87. copy[name] = grammar[name]
  88. })
  89. for (var token in rest) {
  90. copy[token] = rest[token];
  91. }
  92. delete copy.rest;
  93. }
  94. function sanitize(pattern) {
  95. // Unsupported:
  96. // UTF-16 ranges
  97. // [^] => [\s\S] <- matches any character, including new line
  98. // [] => ?? <- matches _empty_ string
  99. // All the whitelisted languages have 0xFFFF as maximum range
  100. // This is not the case for all the grammars supported by Prisma.
  101. pattern = pattern.replaceAll("\\uFFFF", "\\xFF");
  102. pattern = pattern.replaceAll("[^]", "[\\s\\S]");
  103. // TODO: This just bruteforces the regex to work, but of course
  104. // result may vary from the original one.
  105. //static const boost::regex hex(R"(\\u([0-9a-fA-F]{4}))");
  106. //pattern = boost::regex_replace(pattern, hex, R"(\\xFF)");
  107. // TODO: Again, none of the whitelisted languages use [], but others do.
  108. // Howhever, it is unclear to me how [] is supposed to work.
  109. pattern = pattern.replaceAll("|[])", ")");
  110. pattern = pattern.replaceAll(":[]", ":");
  111. return pattern
  112. }
  113. for (var token in copy) {
  114. if (!copy.hasOwnProperty(token) || !copy[token]) {
  115. continue;
  116. }
  117. var patterns = copy[token];
  118. patterns = Array.isArray(patterns) ? patterns : [patterns];
  119. var indexes = []
  120. for (var j = 0; j < patterns.length; ++j) {
  121. var patternObj = patterns[j];
  122. var inside = patternObj.inside;
  123. var lookbehind = !!patternObj.lookbehind;
  124. var greedy = !!patternObj.greedy;
  125. var alias = patternObj.alias;
  126. //alias = Array.isArray(alias) ? alias : [alias];
  127. //alias = alias.join('/')
  128. alias = Array.isArray(alias) ? alias[0] : alias;
  129. var pattern = patternObj.pattern || patternObj;
  130. var patternStr = sanitize(pattern.toString())
  131. if (lookbehind) {
  132. patternStr += "l"
  133. }
  134. if (greedy) {
  135. patternStr += "y"
  136. }
  137. var np
  138. if (alias || inside) {
  139. np = {
  140. pattern: patternStr
  141. }
  142. if (alias) {
  143. np.alias = alias
  144. }
  145. if (inside) {
  146. np.inside = flatten(inside)
  147. }
  148. } else if (pattern instanceof RegExp) {
  149. np = patternStr
  150. } else {
  151. debugger
  152. }
  153. tempPatterns.push(np)
  154. indexes.push(np)
  155. }
  156. keys[token] = indexes
  157. tempTokens.push(indexes)
  158. }
  159. tempGrammars.push(keys)
  160. return keys
  161. }
  162. var unsupported = [
  163. "bsl",
  164. "coq",
  165. "gherkin",
  166. "jexl",
  167. "kumir",
  168. "pure",
  169. "purescript",
  170. "turtle",
  171. "sparql" // requires turtle
  172. ]
  173. await loadScript("https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js")
  174. await loadScript("https://prismjs.com/components.js")
  175. await loadLanguages(Object.keys(components.languages))
  176. console.log(`\nLoaded all ${langNumber} languages`)
  177. console.log("Processing...")
  178. // Manually add local definitions
  179. loadLocalLanguage('./components/prism-tl.js', 'typelanguage', 'TypeLanguage', 'tl')
  180. Object.keys(Prism.languages).forEach(lng => {
  181. if (unsupported.includes(lng) || !components.languages[lng]) {
  182. return
  183. }
  184. tempLanguages[lng] = flatten(Prism.languages[lng])
  185. })
  186. var allTokens = uniqlo(tempTokens, isEqual)
  187. var allGrammars = uniqlo(tempGrammars, isEqual)
  188. var allPatterns = uniqlo(tempPatterns, isEqual)
  189. Object.keys(tempLanguages).forEach(name => {
  190. var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
  191. if (find === undefined) {
  192. debugger
  193. }
  194. tempLanguages[name] = find
  195. })
  196. for (var i = 0; i < allPatterns.length; i++) {
  197. if (allPatterns[i].inside) {
  198. var find = allGrammars.find(x => isEqual(x, allPatterns[i].inside))
  199. if (find === undefined) {
  200. debugger
  201. }
  202. allPatterns[i].inside = find
  203. }
  204. }
  205. for (var i = 0; i < allTokens.length; i++) {
  206. var token = allTokens[i]
  207. for (var j = 0; j < token.length; j++) {
  208. var find = allPatterns.find(x => isEqual(x, token[j]))
  209. if (find === undefined) {
  210. debugger
  211. }
  212. token[j] = find
  213. }
  214. }
  215. for (var i = 0; i < allGrammars.length; i++) {
  216. Object.keys(allGrammars[i]).forEach(name => {
  217. var find = allTokens.find(x => isEqual(x, allGrammars[i][name]))
  218. if (find === undefined) {
  219. debugger
  220. }
  221. allGrammars[i][name] = find
  222. })
  223. }
  224. for (var i = 0; i < allPatterns.length; i++) {
  225. if (allPatterns[i].inside) {
  226. allPatterns[i].inside = allGrammars.indexOf(allPatterns[i].inside)
  227. }
  228. }
  229. for (var i = 0; i < allTokens.length; i++) {
  230. var token = allTokens[i]
  231. for (var j = 0; j < token.length; j++) {
  232. token[j] = allPatterns.indexOf(token[j])
  233. }
  234. }
  235. /*for (var i = 0; i < allGrammars.length; i++) {
  236. Object.keys(allGrammars[i]).forEach(name => {
  237. if (allGrammars[i][name].length == 1) {
  238. allGrammars[i][name] = allGrammars[i][name][0]
  239. }
  240. })
  241. }*/
  242. for (var i = 0; i < allPatterns.length; i++) {
  243. if (allPatterns[i].pattern) {
  244. var patternStr = allPatterns[i].pattern + ",";
  245. if (allPatterns[i].alias) {
  246. patternStr += allPatterns[i].alias
  247. }
  248. patternStr += ","
  249. if (allPatterns[i].inside) {
  250. patternStr += allPatterns[i].inside
  251. }
  252. allPatterns[i] = patternStr
  253. } else {
  254. allPatterns[i] += ",,"
  255. }
  256. }
  257. var allLanguages = {}
  258. var languageNames = {}
  259. Object.keys(tempLanguages).forEach(name => {
  260. var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
  261. if (find === undefined) {
  262. debugger
  263. }
  264. allLanguages[name] = allGrammars.indexOf(find)
  265. languageNames[name] = components.languages[name].title
  266. var alias = components.languages[name].alias
  267. if (alias) {
  268. alias = Array.isArray(alias) ? alias : [alias];
  269. for (const lng of alias) {
  270. allLanguages[lng] = allGrammars.indexOf(find)
  271. if (components.languages[name].aliasTitles) {
  272. languageNames[lng] = components.languages[name].aliasTitles[lng]
  273. } else {
  274. languageNames[lng] = components.languages[name].title
  275. }
  276. }
  277. }
  278. })
  279. var final = {
  280. patterns: allPatterns,
  281. grammars: allGrammars,
  282. languages: allLanguages
  283. }
  284. const chunks = [];
  285. const writeUint16 = i => chunks.push(new Uint16Array([i]))
  286. const writeUint8 = i => chunks.push(new Uint8Array([i]))
  287. const writeString = str => {
  288. if (str.length < 253) {
  289. writeUint8(str.length)
  290. } else {
  291. writeUint8(254 & 0xFF)
  292. writeUint8(str.length & 0xFF)
  293. writeUint8((str.length >> 8) & 0xFF)
  294. writeUint8((str.length >> 16) & 0xFF)
  295. }
  296. chunks.push(new Uint8Array(str.split('').map(char => char.charCodeAt(0))))
  297. }
  298. // Patterns
  299. writeUint16(allPatterns.length)
  300. allPatterns.forEach(pattern => {
  301. writeString(pattern)
  302. })
  303. // Grammars
  304. writeUint16(allGrammars.length)
  305. for (var i = 0; i < allGrammars.length; i++) {
  306. writeUint8(Object.keys(allGrammars[i]).length)
  307. Object.keys(allGrammars[i]).forEach(name => {
  308. writeString(name)
  309. writeUint8(allGrammars[i][name].length)
  310. allGrammars[i][name].forEach(id => {
  311. writeUint16(id)
  312. })
  313. })
  314. }
  315. // Languages
  316. writeUint16(Object.keys(allLanguages).length)
  317. Object.keys(allLanguages).forEach(name => {
  318. writeString(name)
  319. if (languageNames[name]) {
  320. writeString(languageNames[name])
  321. } else {
  322. writeString("")
  323. }
  324. writeUint16(allLanguages[name])
  325. })
  326. const blob = new Blob(chunks, {type: 'application/octet-binary'});
  327. console.log(blob)
  328. return blob;
  329. }
  330. async function saveBlob(blob, filename) {
  331. console.log(`Saving ${blob} to ${filename}`)
  332. const buffer = Buffer.from(await blob.arrayBuffer())
  333. fs.writeFileSync(filename, buffer)
  334. }
  335. const filepath = "libprisma/grammars.dat";
  336. generate().then(blob => {
  337. saveBlob(blob, filepath).then(() => {
  338. console.log("Done! Saved to " + filepath)
  339. })
  340. })