| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424 |
- const fs = require('fs')
- const isEqual = require('lodash.isequal')
- const SCRIPTS = {}
- const include = function (src) {
- // Some black magic of eval. Load the script from src to global scope. Source: https://stackoverflow.com/a/23699187/17140794
- (1, eval)(src.toString())
- }
- async function loadScript(src) {
- const script = await fetch(src)
- const text = await script.text()
- include(text)
- }
- async function loadLanguages(lngs) {
- if (lngs) {
- lngs = Array.isArray(lngs) ? lngs : [lngs];
- for (const lng of lngs) {
- await loadLanguage(lng)
- }
- }
- }
- let langNumber = 0
- async function loadLanguage(lng) {
- if (!components.languages[lng].title) {
- return
- }
- await loadLanguages(components.languages[lng].optional)
- await loadLanguages(components.languages[lng].require)
- await loadLanguages(components.languages[lng].modify)
- if (!SCRIPTS[lng]) {
- SCRIPTS[lng] = true
- langNumber += 1
- console.log(`${langNumber} | Loading ${lng}`);
- // TODO: version should probably not be hardcoded
- await loadScript(`https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-${lng}.min.js`)
- }
- }
- function loadLocalLanguage(path, code, title, alias) {
- include(fs.readFileSync(path))
- components.languages[code] = {
- title: title,
- alias: alias
- }
- }
- function unique(a, fn) {
- if (a.length === 0 || a.length === 1) {
- return a;
- }
- if (!fn) {
- return a;
- }
- for (let i = 0; i < a.length; i++) {
- for (let j = i + 1; j < a.length; j++) {
- if (fn(a[i], a[j])) {
- a.splice(i, 1);
- }
- }
- }
- return a;
- }
- function uniqlo(a, fn) {
- var size = a.length;
- do {
- size = a.length
- a = unique(a, fn)
- }
- while (size > a.length)
- return a
- }
- async function generate() {
- var tempPatterns = []
- var tempLanguages = {}
- var tempTokens = []
- var tempGrammars = []
- var weak = new WeakMap
- function flatten(grammar) {
- var keys = {}
- var cache = weak.get(grammar)
- if (cache !== undefined) {
- return cache
- }
- weak.set(grammar, keys)
- var copy = grammar;
- var rest = copy.rest;
- if (rest) {
- copy = {}
- Object.keys(grammar).forEach(name => {
- copy[name] = grammar[name]
- })
- for (var token in rest) {
- copy[token] = rest[token];
- }
- delete copy.rest;
- }
- function sanitize(pattern) {
- // Unsupported:
- // UTF-16 ranges
- // [^] => [\s\S] <- matches any character, including new line
- // [] => ?? <- matches _empty_ string
- // All the whitelisted languages have 0xFFFF as maximum range
- // This is not the case for all the grammars supported by Prisma.
- pattern = pattern.replaceAll("\\uFFFF", "\\xFF");
- pattern = pattern.replaceAll("[^]", "[\\s\\S]");
- // TODO: This just bruteforces the regex to work, but of course
- // result may vary from the original one.
- //static const boost::regex hex(R"(\\u([0-9a-fA-F]{4}))");
- //pattern = boost::regex_replace(pattern, hex, R"(\\xFF)");
- // TODO: Again, none of the whitelisted languages use [], but others do.
- // Howhever, it is unclear to me how [] is supposed to work.
- pattern = pattern.replaceAll("|[])", ")");
- pattern = pattern.replaceAll(":[]", ":");
- return pattern
- }
- for (var token in copy) {
- if (!copy.hasOwnProperty(token) || !copy[token]) {
- continue;
- }
- var patterns = copy[token];
- patterns = Array.isArray(patterns) ? patterns : [patterns];
- var indexes = []
- for (var j = 0; j < patterns.length; ++j) {
- var patternObj = patterns[j];
- var inside = patternObj.inside;
- var lookbehind = !!patternObj.lookbehind;
- var greedy = !!patternObj.greedy;
- var alias = patternObj.alias;
- //alias = Array.isArray(alias) ? alias : [alias];
- //alias = alias.join('/')
- alias = Array.isArray(alias) ? alias[0] : alias;
- var pattern = patternObj.pattern || patternObj;
- var patternStr = sanitize(pattern.toString())
- if (lookbehind) {
- patternStr += "l"
- }
- if (greedy) {
- patternStr += "y"
- }
- var np
- if (alias || inside) {
- np = {
- pattern: patternStr
- }
- if (alias) {
- np.alias = alias
- }
- if (inside) {
- np.inside = flatten(inside)
- }
- } else if (pattern instanceof RegExp) {
- np = patternStr
- } else {
- debugger
- }
- tempPatterns.push(np)
- indexes.push(np)
- }
- keys[token] = indexes
- tempTokens.push(indexes)
- }
- tempGrammars.push(keys)
- return keys
- }
- var unsupported = [
- "bsl",
- "coq",
- "gherkin",
- "jexl",
- "kumir",
- "pure",
- "purescript",
- "turtle",
- "sparql" // requires turtle
- ]
- await loadScript("https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js")
- await loadScript("https://prismjs.com/components.js")
- await loadLanguages(Object.keys(components.languages))
- console.log(`\nLoaded all ${langNumber} languages`)
- console.log("Processing...")
- // Manually add local definitions
- loadLocalLanguage('./components/prism-tl.js', 'typelanguage', 'TypeLanguage', 'tl')
- Object.keys(Prism.languages).forEach(lng => {
- if (unsupported.includes(lng) || !components.languages[lng]) {
- return
- }
- tempLanguages[lng] = flatten(Prism.languages[lng])
- })
- var allTokens = uniqlo(tempTokens, isEqual)
- var allGrammars = uniqlo(tempGrammars, isEqual)
- var allPatterns = uniqlo(tempPatterns, isEqual)
- Object.keys(tempLanguages).forEach(name => {
- var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
- if (find === undefined) {
- debugger
- }
- tempLanguages[name] = find
- })
- for (var i = 0; i < allPatterns.length; i++) {
- if (allPatterns[i].inside) {
- var find = allGrammars.find(x => isEqual(x, allPatterns[i].inside))
- if (find === undefined) {
- debugger
- }
- allPatterns[i].inside = find
- }
- }
- for (var i = 0; i < allTokens.length; i++) {
- var token = allTokens[i]
- for (var j = 0; j < token.length; j++) {
- var find = allPatterns.find(x => isEqual(x, token[j]))
- if (find === undefined) {
- debugger
- }
- token[j] = find
- }
- }
- for (var i = 0; i < allGrammars.length; i++) {
- Object.keys(allGrammars[i]).forEach(name => {
- var find = allTokens.find(x => isEqual(x, allGrammars[i][name]))
- if (find === undefined) {
- debugger
- }
- allGrammars[i][name] = find
- })
- }
- for (var i = 0; i < allPatterns.length; i++) {
- if (allPatterns[i].inside) {
- allPatterns[i].inside = allGrammars.indexOf(allPatterns[i].inside)
- }
- }
- for (var i = 0; i < allTokens.length; i++) {
- var token = allTokens[i]
- for (var j = 0; j < token.length; j++) {
- token[j] = allPatterns.indexOf(token[j])
- }
- }
- /*for (var i = 0; i < allGrammars.length; i++) {
- Object.keys(allGrammars[i]).forEach(name => {
- if (allGrammars[i][name].length == 1) {
- allGrammars[i][name] = allGrammars[i][name][0]
- }
- })
- }*/
- for (var i = 0; i < allPatterns.length; i++) {
- if (allPatterns[i].pattern) {
- var patternStr = allPatterns[i].pattern + ",";
- if (allPatterns[i].alias) {
- patternStr += allPatterns[i].alias
- }
- patternStr += ","
- if (allPatterns[i].inside) {
- patternStr += allPatterns[i].inside
- }
- allPatterns[i] = patternStr
- } else {
- allPatterns[i] += ",,"
- }
- }
- var allLanguages = {}
- var languageNames = {}
- Object.keys(tempLanguages).forEach(name => {
- var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
- if (find === undefined) {
- debugger
- }
- allLanguages[name] = allGrammars.indexOf(find)
- languageNames[name] = components.languages[name].title
- var alias = components.languages[name].alias
- if (alias) {
- alias = Array.isArray(alias) ? alias : [alias];
- for (const lng of alias) {
- allLanguages[lng] = allGrammars.indexOf(find)
- if (components.languages[name].aliasTitles) {
- languageNames[lng] = components.languages[name].aliasTitles[lng]
- } else {
- languageNames[lng] = components.languages[name].title
- }
- }
- }
- })
- var final = {
- patterns: allPatterns,
- grammars: allGrammars,
- languages: allLanguages
- }
- const chunks = [];
- const writeUint16 = i => chunks.push(new Uint16Array([i]))
- const writeUint8 = i => chunks.push(new Uint8Array([i]))
- const writeString = str => {
- if (str.length < 253) {
- writeUint8(str.length)
- } else {
- writeUint8(254 & 0xFF)
- writeUint8(str.length & 0xFF)
- writeUint8((str.length >> 8) & 0xFF)
- writeUint8((str.length >> 16) & 0xFF)
- }
- chunks.push(new Uint8Array(str.split('').map(char => char.charCodeAt(0))))
- }
- // Patterns
- writeUint16(allPatterns.length)
- allPatterns.forEach(pattern => {
- writeString(pattern)
- })
- // Grammars
- writeUint16(allGrammars.length)
- for (var i = 0; i < allGrammars.length; i++) {
- writeUint8(Object.keys(allGrammars[i]).length)
- Object.keys(allGrammars[i]).forEach(name => {
- writeString(name)
- writeUint8(allGrammars[i][name].length)
- allGrammars[i][name].forEach(id => {
- writeUint16(id)
- })
- })
- }
- // Languages
- writeUint16(Object.keys(allLanguages).length)
- Object.keys(allLanguages).forEach(name => {
- writeString(name)
- if (languageNames[name]) {
- writeString(languageNames[name])
- } else {
- writeString("")
- }
- writeUint16(allLanguages[name])
- })
- const blob = new Blob(chunks, {type: 'application/octet-binary'});
- console.log(blob)
- return blob;
- }
- async function saveBlob(blob, filename) {
- console.log(`Saving ${blob} to ${filename}`)
- const buffer = Buffer.from(await blob.arrayBuffer())
- fs.writeFileSync(filename, buffer)
- }
- const filepath = "libprisma/grammars.dat";
- generate().then(blob => {
- saveBlob(blob, filepath).then(() => {
- console.log("Done! Saved to " + filepath)
- })
- })
|