match.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. var hasOwnProperty = Object.prototype.hasOwnProperty;
  2. var matchGraph = require('./match-graph');
  3. var MATCH = matchGraph.MATCH;
  4. var MISMATCH = matchGraph.MISMATCH;
  5. var DISALLOW_EMPTY = matchGraph.DISALLOW_EMPTY;
  6. var TYPE = require('../tokenizer/const').TYPE;
  7. var STUB = 0;
  8. var TOKEN = 1;
  9. var OPEN_SYNTAX = 2;
  10. var CLOSE_SYNTAX = 3;
  11. var EXIT_REASON_MATCH = 'Match';
  12. var EXIT_REASON_MISMATCH = 'Mismatch';
  13. var EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)';
  14. var ITERATION_LIMIT = 10000;
  15. var totalIterationCount = 0;
  16. function mapList(list, fn) {
  17. var result = [];
  18. while (list) {
  19. result.unshift(fn(list));
  20. list = list.prev;
  21. }
  22. return result;
  23. }
  24. function areStringsEqualCaseInsensitive(testStr, referenceStr) {
  25. if (testStr.length !== referenceStr.length) {
  26. return false;
  27. }
  28. for (var i = 0; i < testStr.length; i++) {
  29. var testCode = testStr.charCodeAt(i);
  30. var referenceCode = referenceStr.charCodeAt(i);
  31. // testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z).
  32. if (testCode >= 0x0041 && testCode <= 0x005A) {
  33. testCode = testCode | 32;
  34. }
  35. if (testCode !== referenceCode) {
  36. return false;
  37. }
  38. }
  39. return true;
  40. }
  41. function isCommaContextStart(token) {
  42. if (token === null) {
  43. return true;
  44. }
  45. var ch = token.value.charAt(token.value.length - 1);
  46. return (
  47. ch === ',' ||
  48. ch === '(' ||
  49. ch === '[' ||
  50. ch === '/'
  51. );
  52. }
  53. function isCommaContextEnd(token) {
  54. if (token === null) {
  55. return true;
  56. }
  57. var ch = token.value.charAt(0);
  58. return (
  59. ch === ')' ||
  60. ch === ']' ||
  61. ch === '/'
  62. );
  63. }
  64. function internalMatch(tokens, state, syntaxes) {
  65. function moveToNextToken() {
  66. do {
  67. tokenIndex++;
  68. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  69. } while (token !== null && !/\S/.test(token.value));
  70. }
  71. function getNextToken(offset) {
  72. var nextIndex = tokenIndex + offset;
  73. return nextIndex < tokens.length ? tokens[nextIndex] : null;
  74. }
  75. function stateSnapshotFromSyntax(nextState, prev) {
  76. return {
  77. nextState: nextState,
  78. matchStack: matchStack,
  79. syntaxStack: syntaxStack,
  80. thenStack: thenStack,
  81. tokenIndex: tokenIndex,
  82. token: token,
  83. prev: prev
  84. };
  85. }
  86. function pushThenStack(nextState) {
  87. thenStack = {
  88. nextState: nextState,
  89. matchStack: matchStack,
  90. syntaxStack: syntaxStack,
  91. prev: thenStack
  92. };
  93. }
  94. function pushElseStack(nextState) {
  95. elseStack = stateSnapshotFromSyntax(nextState, elseStack);
  96. }
  97. function addTokenToMatch() {
  98. matchStack = {
  99. type: TOKEN,
  100. syntax: state.syntax,
  101. token: token,
  102. prev: matchStack
  103. };
  104. moveToNextToken();
  105. syntaxStash = null;
  106. if (tokenIndex > longestMatch) {
  107. longestMatch = tokenIndex;
  108. }
  109. }
  110. function openSyntax() {
  111. syntaxStack = {
  112. syntax: state,
  113. opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null,
  114. prev: syntaxStack
  115. };
  116. matchStack = {
  117. type: OPEN_SYNTAX,
  118. syntax: state.syntax,
  119. token: matchStack.token,
  120. prev: matchStack
  121. };
  122. }
  123. function closeSyntax() {
  124. if (matchStack.type === OPEN_SYNTAX) {
  125. matchStack = matchStack.prev;
  126. } else {
  127. matchStack = {
  128. type: CLOSE_SYNTAX,
  129. syntax: syntaxStack.syntax,
  130. token: matchStack.token,
  131. prev: matchStack
  132. };
  133. }
  134. syntaxStack = syntaxStack.prev;
  135. }
  136. var syntaxStack = null;
  137. var thenStack = null;
  138. var elseStack = null;
  139. // null – stashing allowed, nothing stashed
  140. // false – stashing disabled, nothing stashed
  141. // anithing else – fail stashable syntaxes, some syntax stashed
  142. var syntaxStash = null;
  143. var iterationCount = 0; // count iterations and prevent infinite loop
  144. var exitReason = null;
  145. var token = null;
  146. var tokenIndex = -1;
  147. var longestMatch = 0;
  148. var matchStack = {
  149. type: STUB,
  150. syntax: null,
  151. token: null,
  152. prev: null
  153. };
  154. moveToNextToken();
  155. while (exitReason === null && ++iterationCount < ITERATION_LIMIT) {
  156. // console.log('--\n',
  157. // '#' + iterationCount,
  158. // require('util').inspect({
  159. // match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null),
  160. // elseStack: mapList(elseStack, x => x.id),
  161. // thenStack: mapList(thenStack, x => x.id),
  162. // token: token && token.value,
  163. // tokenCursor,
  164. // syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '')
  165. // }, { depth: null })
  166. // );
  167. switch (state.type) {
  168. case 'Match':
  169. if (thenStack === null) {
  170. // turn to MISMATCH when some tokens left unmatched
  171. if (token !== null) {
  172. // doesn't mismatch if just one token left and it's an IE hack
  173. if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) {
  174. state = MISMATCH;
  175. break;
  176. }
  177. }
  178. // break the main loop, return a result - MATCH
  179. exitReason = EXIT_REASON_MATCH;
  180. break;
  181. }
  182. // go to next syntax (`then` branch)
  183. state = thenStack.nextState;
  184. // check match is not empty
  185. if (state === DISALLOW_EMPTY) {
  186. if (thenStack.matchStack.token === matchStack.token) {
  187. state = MISMATCH;
  188. break;
  189. } else {
  190. state = MATCH;
  191. }
  192. }
  193. // close syntax if needed
  194. while (syntaxStack !== null && thenStack.syntaxStack !== syntaxStack) {
  195. closeSyntax();
  196. }
  197. // pop stack
  198. thenStack = thenStack.prev;
  199. break;
  200. case 'Mismatch':
  201. // when some syntax is stashed
  202. if (syntaxStash !== null && syntaxStash !== false) {
  203. // there is no else branches or a branch reduce match stack
  204. if (elseStack === null || tokenIndex > elseStack.tokenIndex) {
  205. // restore state from the stash
  206. elseStack = syntaxStash;
  207. syntaxStash = false; // disable stashing
  208. }
  209. } else if (elseStack === null) {
  210. // no else branches -> break the main loop
  211. // return a result - MISMATCH
  212. exitReason = EXIT_REASON_MISMATCH;
  213. break;
  214. }
  215. // go to next syntax (`else` branch)
  216. state = elseStack.nextState;
  217. // restore all the rest stack states
  218. thenStack = elseStack.thenStack;
  219. syntaxStack = elseStack.syntaxStack;
  220. matchStack = elseStack.matchStack;
  221. tokenIndex = elseStack.tokenIndex;
  222. token = elseStack.token;
  223. // pop stack
  224. elseStack = elseStack.prev;
  225. break;
  226. case 'MatchGraph':
  227. state = state.match;
  228. break;
  229. case 'If':
  230. // IMPORTANT: else stack push must go first,
  231. // since it stores the state of thenStack before changes
  232. if (state.else !== MISMATCH) {
  233. pushElseStack(state.else);
  234. }
  235. if (state.then !== MATCH) {
  236. pushThenStack(state.then);
  237. }
  238. state = state.match;
  239. break;
  240. case 'MatchOnce':
  241. state = {
  242. type: 'MatchOnceBuffer',
  243. syntax: state,
  244. index: 0,
  245. mask: 0
  246. };
  247. break;
  248. case 'MatchOnceBuffer':
  249. var terms = state.syntax.terms;
  250. if (state.index === terms.length) {
  251. // no matches at all or it's required all terms to be matched
  252. if (state.mask === 0 || state.syntax.all) {
  253. state = MISMATCH;
  254. break;
  255. }
  256. // a partial match is ok
  257. state = MATCH;
  258. break;
  259. }
  260. // all terms are matched
  261. if (state.mask === (1 << terms.length) - 1) {
  262. state = MATCH;
  263. break;
  264. }
  265. for (; state.index < terms.length; state.index++) {
  266. var matchFlag = 1 << state.index;
  267. if ((state.mask & matchFlag) === 0) {
  268. // IMPORTANT: else stack push must go first,
  269. // since it stores the state of thenStack before changes
  270. pushElseStack(state);
  271. pushThenStack({
  272. type: 'AddMatchOnce',
  273. syntax: state.syntax,
  274. mask: state.mask | matchFlag
  275. });
  276. // match
  277. state = terms[state.index++];
  278. break;
  279. }
  280. }
  281. break;
  282. case 'AddMatchOnce':
  283. state = {
  284. type: 'MatchOnceBuffer',
  285. syntax: state.syntax,
  286. index: 0,
  287. mask: state.mask
  288. };
  289. break;
  290. case 'Enum':
  291. if (token !== null) {
  292. var name = token.value.toLowerCase();
  293. // drop \0 and \9 hack from keyword name
  294. if (name.indexOf('\\') !== -1) {
  295. name = name.replace(/\\[09].*$/, '');
  296. }
  297. if (hasOwnProperty.call(state.map, name)) {
  298. state = state.map[name];
  299. break;
  300. }
  301. }
  302. state = MISMATCH;
  303. break;
  304. case 'Generic':
  305. var opts = syntaxStack !== null ? syntaxStack.opts : null;
  306. var tokenCount = Math.floor(state.fn(token, getNextToken, opts));
  307. if (!isNaN(tokenCount) && tokenCount > 0) {
  308. for (var lastTokenIndex = tokenIndex + tokenCount; tokenIndex < lastTokenIndex;) {
  309. addTokenToMatch();
  310. }
  311. state = MATCH;
  312. } else {
  313. state = MISMATCH;
  314. }
  315. break;
  316. case 'Type':
  317. case 'Property':
  318. var syntaxDict = state.type === 'Type' ? 'types' : 'properties';
  319. var dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null;
  320. if (!dictSyntax || !dictSyntax.match) {
  321. throw new Error(
  322. 'Bad syntax reference: ' +
  323. (state.type === 'Type'
  324. ? '<' + state.name + '>'
  325. : '<\'' + state.name + '\'>')
  326. );
  327. }
  328. // stash a syntax for types with low priority
  329. if (syntaxStash !== false && token !== null && state.type === 'Type') {
  330. var lowPriorityMatching =
  331. // https://drafts.csswg.org/css-values-4/#custom-idents
  332. // When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production
  333. // can only claim the keyword if no other unfulfilled production can claim it.
  334. (state.name === 'custom-ident' && token.type === TYPE.Ident) ||
  335. // https://drafts.csswg.org/css-values-4/#lengths
  336. // ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height),
  337. // it must parse as a <number>
  338. (state.name === 'length' && token.value === '0');
  339. if (lowPriorityMatching) {
  340. if (syntaxStash === null) {
  341. syntaxStash = stateSnapshotFromSyntax(state, elseStack);
  342. }
  343. state = MISMATCH;
  344. break;
  345. }
  346. }
  347. openSyntax();
  348. state = dictSyntax.match;
  349. break;
  350. case 'Keyword':
  351. var name = state.name;
  352. if (token !== null) {
  353. var keywordName = token.value;
  354. // drop \0 and \9 hack from keyword name
  355. if (keywordName.indexOf('\\') !== -1) {
  356. keywordName = keywordName.replace(/\\[09].*$/, '');
  357. }
  358. if (areStringsEqualCaseInsensitive(keywordName, name)) {
  359. addTokenToMatch();
  360. state = MATCH;
  361. break;
  362. }
  363. }
  364. state = MISMATCH;
  365. break;
  366. case 'AtKeyword':
  367. case 'Function':
  368. if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) {
  369. addTokenToMatch();
  370. state = MATCH;
  371. break;
  372. }
  373. state = MISMATCH;
  374. break;
  375. case 'Token':
  376. if (token !== null && token.value === state.value) {
  377. addTokenToMatch();
  378. state = MATCH;
  379. break;
  380. }
  381. state = MISMATCH;
  382. break;
  383. case 'Comma':
  384. if (token !== null && token.value === ',') {
  385. if (isCommaContextStart(matchStack.token)) {
  386. state = MISMATCH;
  387. } else {
  388. addTokenToMatch();
  389. state = isCommaContextEnd(token) ? MISMATCH : MATCH;
  390. }
  391. } else {
  392. state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH;
  393. }
  394. break;
  395. case 'String':
  396. var string = '';
  397. for (var lastTokenIndex = tokenIndex; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) {
  398. string += tokens[lastTokenIndex].value;
  399. }
  400. if (areStringsEqualCaseInsensitive(string, state.value)) {
  401. for (; tokenIndex < lastTokenIndex;) {
  402. addTokenToMatch();
  403. }
  404. state = MATCH;
  405. } else {
  406. state = MISMATCH;
  407. }
  408. break;
  409. default:
  410. throw new Error('Unknown node type: ' + state.type);
  411. }
  412. }
  413. totalIterationCount += iterationCount;
  414. if (exitReason === null) {
  415. console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations');
  416. exitReason = EXIT_REASON_ITERATION_LIMIT;
  417. }
  418. if (exitReason === EXIT_REASON_MATCH) {
  419. while (syntaxStack !== null) {
  420. closeSyntax();
  421. }
  422. } else {
  423. matchStack = null;
  424. }
  425. return {
  426. tokens: tokens,
  427. reason: exitReason,
  428. iterations: iterationCount,
  429. match: matchStack,
  430. longestMatch: longestMatch
  431. };
  432. }
  433. function matchAsList(tokens, matchGraph, syntaxes) {
  434. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  435. if (matchResult.match !== null) {
  436. matchResult.match = mapList(matchResult.match, function(item) {
  437. if (item.type === OPEN_SYNTAX || item.type === CLOSE_SYNTAX) {
  438. return { type: item.type, syntax: item.syntax };
  439. }
  440. return {
  441. syntax: item.syntax,
  442. token: item.token && item.token.value,
  443. node: item.token && item.token.node
  444. };
  445. }).slice(1);
  446. }
  447. return matchResult;
  448. }
  449. function matchAsTree(tokens, matchGraph, syntaxes) {
  450. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  451. if (matchResult.match === null) {
  452. return matchResult;
  453. }
  454. var cursor = matchResult.match;
  455. var host = matchResult.match = {
  456. syntax: matchGraph.syntax || null,
  457. match: []
  458. };
  459. var stack = [host];
  460. // revert a list
  461. var prev = null;
  462. var next = null;
  463. while (cursor !== null) {
  464. next = cursor.prev;
  465. cursor.prev = prev;
  466. prev = cursor;
  467. cursor = next;
  468. }
  469. // init the cursor to start with 2nd item since 1st is a stub item
  470. cursor = prev.prev;
  471. // build a tree
  472. while (cursor !== null && cursor.syntax !== null) {
  473. var entry = cursor;
  474. switch (entry.type) {
  475. case OPEN_SYNTAX:
  476. host.match.push(host = {
  477. syntax: entry.syntax,
  478. match: []
  479. });
  480. stack.push(host);
  481. break;
  482. case CLOSE_SYNTAX:
  483. stack.pop();
  484. host = stack[stack.length - 1];
  485. break;
  486. default:
  487. host.match.push({
  488. syntax: entry.syntax || null,
  489. token: entry.token.value,
  490. node: entry.token.node
  491. });
  492. }
  493. cursor = cursor.prev;
  494. }
  495. return matchResult;
  496. }
  497. module.exports = {
  498. matchAsList: matchAsList,
  499. matchAsTree: matchAsTree,
  500. getTotalIterationCount: function() {
  501. return totalIterationCount;
  502. }
  503. };