parseUtils.js 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. 'use strict';
  2. var fs = require('fs');
  3. var _ = require('lodash');
  4. var acorn = require('acorn');
  5. var walk = require('acorn/dist/walk');
  6. module.exports = {
  7. parseBundle
  8. };
  9. function parseBundle(bundlePath) {
  10. var content = fs.readFileSync(bundlePath, 'utf8');
  11. var ast = acorn.parse(content, {
  12. sourceType: 'script',
  13. // I believe in a bright future of ECMAScript!
  14. // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
  15. // Seems like `acorn` supports such weird option value.
  16. ecmaVersion: 2050
  17. });
  18. var walkState = {
  19. locations: null
  20. };
  21. walk.recursive(ast, walkState, {
  22. CallExpression(node, state, c) {
  23. if (state.sizes) return;
  24. var args = node.arguments;
  25. // Additional bundle without webpack loader.
  26. // Modules are stored in second argument, after chunk ids:
  27. // webpackJsonp([<chunks>], <modules>, ...)
  28. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  29. if (node.callee.type === 'Identifier' && args.length >= 2 && isArgumentContainsChunkIds(args[0]) && isArgumentContainsModulesList(args[1])) {
  30. state.locations = getModulesLocationFromFunctionArgument(args[1]);
  31. return;
  32. }
  33. // Additional bundle without webpack loader, with module IDs optimized.
  34. // Modules are stored in second arguments Array(n).concat() call
  35. // webpackJsonp([<chunks>], Array([minimum ID]).concat([<module>, <module>, ...]))
  36. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  37. if (node.callee.type === 'Identifier' && (args.length === 2 || args.length === 3) && isArgumentContainsChunkIds(args[0]) && isArgumentArrayConcatContainingChunks(args[1])) {
  38. state.locations = getModulesLocationFromArrayConcat(args[1]);
  39. return;
  40. }
  41. // Main bundle with webpack loader
  42. // Modules are stored in first argument:
  43. // (function (...) {...})(<modules>)
  44. if (node.callee.type === 'FunctionExpression' && !node.callee.id && args.length === 1 && isArgumentContainsModulesList(args[0])) {
  45. state.locations = getModulesLocationFromFunctionArgument(args[0]);
  46. return;
  47. }
  48. // Additional bundles with webpack 4 are loaded with:
  49. // (window.webpackJsonp=window.webpackJsonp||[]).push([[chunkId], [<module>, <module>], [[optional_entries]]]);
  50. if (isAsyncChunkPushExpression(node) && args.length === 1 && isArgumentContainingChunkIdsAndModulesList(args[0])) {
  51. state.locations = getModulesLocationFromFunctionArgument(args[0].elements[1]);
  52. return;
  53. }
  54. // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
  55. // features (e.g. `umd` library output) can wrap modules list into additional IIFE.
  56. _.each(args, function (arg) {
  57. return c(arg, state);
  58. });
  59. }
  60. });
  61. if (!walkState.locations) {
  62. return null;
  63. }
  64. return {
  65. src: content,
  66. modules: _.mapValues(walkState.locations, function (loc) {
  67. return content.slice(loc.start, loc.end);
  68. })
  69. };
  70. }
  71. function isArgumentContainsChunkIds(arg) {
  72. // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  73. return arg.type === 'ArrayExpression' && _.every(arg.elements, isModuleId);
  74. }
  75. function isArgumentContainsModulesList(arg) {
  76. if (arg.type === 'ObjectExpression') {
  77. return _(arg.properties).map('value').every(isModuleWrapper);
  78. }
  79. if (arg.type === 'ArrayExpression') {
  80. // Modules are contained in array.
  81. // Array indexes are module ids
  82. return _.every(arg.elements, function (elem) {
  83. return (
  84. // Some of array items may be skipped because there is no module with such id
  85. !elem || isModuleWrapper(elem)
  86. );
  87. });
  88. }
  89. return false;
  90. }
  91. function isArgumentContainingChunkIdsAndModulesList(arg) {
  92. if (arg.type === 'ArrayExpression' && arg.elements.length >= 2 && isArgumentContainsChunkIds(arg.elements[0]) && isArgumentContainsModulesList(arg.elements[1])) {
  93. return true;
  94. }
  95. return false;
  96. }
  97. function isArgumentArrayConcatContainingChunks(arg) {
  98. if (arg.type === 'CallExpression' && arg.callee.type === 'MemberExpression' &&
  99. // Make sure the object called is `Array(<some number>)`
  100. arg.callee.object.type === 'CallExpression' && arg.callee.object.callee.type === 'Identifier' && arg.callee.object.callee.name === 'Array' && arg.callee.object.arguments.length === 1 && isNumericId(arg.callee.object.arguments[0]) &&
  101. // Make sure the property X called for `Array(<some number>).X` is `concat`
  102. arg.callee.property.type === 'Identifier' && arg.callee.property.name === 'concat' &&
  103. // Make sure exactly one array is passed in to `concat`
  104. arg.arguments.length === 1 && arg.arguments[0].type === 'ArrayExpression') {
  105. // Modules are contained in `Array(<minimum ID>).concat(` array:
  106. // https://github.com/webpack/webpack/blob/v1.14.0/lib/Template.js#L91
  107. // The `<minimum ID>` + array indexes are module ids
  108. return true;
  109. }
  110. return false;
  111. }
  112. function isAsyncChunkPushExpression(node) {
  113. var callee = node.callee;
  114. return callee.type === 'MemberExpression' && callee.property.name === 'push' && callee.object.type === 'AssignmentExpression' && (callee.object.left.object.name === 'window' ||
  115. // Webpack 4 uses `this` instead of `window`
  116. callee.object.left.object.type === 'ThisExpression');
  117. }
  118. function isModuleWrapper(node) {
  119. return (
  120. // It's an anonymous function expression that wraps module
  121. (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && !node.id ||
  122. // If `DedupePlugin` is used it can be an ID of duplicated module...
  123. isModuleId(node) ||
  124. // or an array of shape [<module_id>, ...args]
  125. node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0])
  126. );
  127. }
  128. function isModuleId(node) {
  129. return node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string');
  130. }
  131. function isNumericId(node) {
  132. return node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0;
  133. }
  134. function getModulesLocationFromFunctionArgument(arg) {
  135. if (arg.type === 'ObjectExpression') {
  136. var modulesNodes = arg.properties;
  137. return _.transform(modulesNodes, function (result, moduleNode) {
  138. var moduleId = moduleNode.key.name || moduleNode.key.value;
  139. result[moduleId] = getModuleLocation(moduleNode.value);
  140. }, {});
  141. }
  142. if (arg.type === 'ArrayExpression') {
  143. var _modulesNodes = arg.elements;
  144. return _.transform(_modulesNodes, function (result, moduleNode, i) {
  145. if (!moduleNode) return;
  146. result[i] = getModuleLocation(moduleNode);
  147. }, {});
  148. }
  149. return {};
  150. }
  151. function getModulesLocationFromArrayConcat(arg) {
  152. // arg(CallExpression) =
  153. // Array([minId]).concat([<minId module>, <minId+1 module>, ...])
  154. //
  155. // Get the [minId] value from the Array() call first argument literal value
  156. var minId = arg.callee.object.arguments[0].value;
  157. // The modules reside in the `concat()` function call arguments
  158. var modulesNodes = arg.arguments[0].elements;
  159. return _.transform(modulesNodes, function (result, moduleNode, i) {
  160. if (!moduleNode) return;
  161. result[i + minId] = getModuleLocation(moduleNode);
  162. }, {});
  163. }
  164. function getModuleLocation(node) {
  165. return _.pick(node, 'start', 'end');
  166. }