implib-gen.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. #!/usr/bin/env python3
  2. # Copyright 2017-2023 Yury Gribov
  3. #
  4. # The MIT License (MIT)
  5. #
  6. # Use of this source code is governed by MIT license that can be
  7. # found in the LICENSE.txt file.
  8. """
  9. Generates static import library for POSIX shared library
  10. """
  11. import sys
  12. import os.path
  13. import re
  14. import subprocess
  15. import argparse
  16. import string
  17. import configparser
  18. me = os.path.basename(__file__)
  19. root = os.path.dirname(__file__)
  20. def warn(msg):
  21. """Emits a nicely-decorated warning."""
  22. sys.stderr.write(f'{me}: warning: {msg}\n')
  23. def error(msg):
  24. """Emits a nicely-decorated error and exits."""
  25. sys.stderr.write(f'{me}: error: {msg}\n')
  26. sys.exit(1)
  27. def run(args, stdin=''):
  28. """Runs external program and aborts on error."""
  29. env = os.environ.copy()
  30. # Force English language
  31. env['LC_ALL'] = 'c'
  32. try:
  33. del env["LANG"]
  34. except KeyError:
  35. pass
  36. with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
  37. stderr=subprocess.PIPE, env=env) as p:
  38. out, err = p.communicate(input=stdin.encode('utf-8'))
  39. out = out.decode('utf-8')
  40. err = err.decode('utf-8')
  41. if p.returncode != 0 or err:
  42. error(f"{args[0]} failed with retcode {p.returncode}:\n{err}")
  43. return out, err
  44. def make_toc(words, renames=None):
  45. "Make an mapping of words to their indices in list"
  46. renames = renames or {}
  47. toc = {}
  48. for i, n in enumerate(words):
  49. name = renames.get(n, n)
  50. toc[i] = name
  51. return toc
  52. def parse_row(words, toc, hex_keys):
  53. "Make a mapping from column names to values"
  54. vals = {k: (words[i] if i < len(words) else '') for i, k in toc.items()}
  55. for k in hex_keys:
  56. if vals[k]:
  57. vals[k] = int(vals[k], 16)
  58. return vals
  59. def collect_syms(f):
  60. """Collect ELF dynamic symtab."""
  61. # --dyn-syms does not always work for some reason so dump all symtabs
  62. out, _ = run(['readelf', '-sW', f])
  63. toc = None
  64. syms = []
  65. syms_set = set()
  66. for line in out.splitlines():
  67. line = line.strip()
  68. if not line:
  69. # Next symtab
  70. toc = None
  71. continue
  72. words = re.split(r' +', line)
  73. if line.startswith('Num'): # Header?
  74. if toc is not None:
  75. error("multiple headers in output of readelf")
  76. # Colons are different across readelf versions so get rid of them.
  77. toc = make_toc(map(lambda n: n.replace(':', ''), words))
  78. elif toc is not None:
  79. sym = parse_row(words, toc, ['Value'])
  80. name = sym['Name']
  81. if not name:
  82. continue
  83. if name in syms_set:
  84. continue
  85. syms_set.add(name)
  86. sym['Size'] = int(sym['Size'], 0) # Readelf is inconistent on Size format
  87. if '@' in name:
  88. sym['Default'] = '@@' in name
  89. name, ver = re.split(r'@+', name)
  90. sym['Name'] = name
  91. sym['Version'] = ver
  92. else:
  93. sym['Default'] = True
  94. sym['Version'] = None
  95. syms.append(sym)
  96. if toc is None:
  97. error(f"failed to analyze symbols in {f}")
  98. # Also collected demangled names
  99. if syms:
  100. out, _ = run(['c++filt'], '\n'.join((sym['Name'] for sym in syms)))
  101. out = out.rstrip("\n") # Some c++filts append newlines at the end
  102. for i, name in enumerate(out.split("\n")):
  103. syms[i]['Demangled Name'] = name
  104. return syms
  105. def collect_relocs(f):
  106. """Collect ELF dynamic relocs."""
  107. out, _ = run(['readelf', '-rW', f])
  108. toc = None
  109. rels = []
  110. for line in out.splitlines():
  111. line = line.strip()
  112. if not line:
  113. toc = None
  114. continue
  115. if line == 'There are no relocations in this file.':
  116. return []
  117. if re.match(r'^\s*Type[0-9]:', line): # Spurious lines for MIPS
  118. continue
  119. if re.match(r'^\s*Offset', line): # Header?
  120. if toc is not None:
  121. error("multiple headers in output of readelf")
  122. words = re.split(r'\s\s+', line) # "Symbol's Name + Addend"
  123. toc = make_toc(words)
  124. elif toc is not None:
  125. line = re.sub(r' \+ ', '+', line)
  126. words = re.split(r'\s+', line)
  127. rel = parse_row(words, toc, ['Offset', 'Info'])
  128. rels.append(rel)
  129. # Split symbolic representation
  130. sym_name = 'Symbol\'s Name + Addend'
  131. if sym_name not in rel and 'Symbol\'s Name' in rel:
  132. # Adapt to different versions of readelf
  133. rel[sym_name] = rel['Symbol\'s Name'] + '+0'
  134. if rel[sym_name]:
  135. p = rel[sym_name].split('+')
  136. if len(p) == 1:
  137. p = ['', p[0]]
  138. rel[sym_name] = (p[0], int(p[1], 16))
  139. if toc is None:
  140. error(f"failed to analyze relocations in {f}")
  141. return rels
  142. def collect_sections(f):
  143. """Collect section info from ELF."""
  144. out, _ = run(['readelf', '-SW', f])
  145. toc = None
  146. sections = []
  147. for line in out.splitlines():
  148. line = line.strip()
  149. if not line:
  150. continue
  151. line = re.sub(r'\[\s+', '[', line)
  152. words = re.split(r' +', line)
  153. if line.startswith('[Nr]'): # Header?
  154. if toc is not None:
  155. error("multiple headers in output of readelf")
  156. toc = make_toc(words, {'Addr' : 'Address'})
  157. elif line.startswith('[') and toc is not None:
  158. sec = parse_row(words, toc, ['Address', 'Off', 'Size'])
  159. if 'A' in sec['Flg']: # Allocatable section?
  160. sections.append(sec)
  161. if toc is None:
  162. error(f"failed to analyze sections in {f}")
  163. return sections
  164. def read_unrelocated_data(input_name, syms, secs):
  165. """Collect unrelocated data from ELF."""
  166. data = {}
  167. with open(input_name, 'rb') as f:
  168. def is_symbol_in_section(sym, sec):
  169. sec_end = sec['Address'] + sec['Size']
  170. is_start_in_section = sec['Address'] <= sym['Value'] < sec_end
  171. is_end_in_section = sym['Value'] + sym['Size'] <= sec_end
  172. return is_start_in_section and is_end_in_section
  173. for name, s in sorted(syms.items(), key=lambda s: s[1]['Value']):
  174. # TODO: binary search (bisect)
  175. sec = [sec for sec in secs if is_symbol_in_section(s, sec)]
  176. if len(sec) != 1:
  177. error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})")
  178. sec = sec[0]
  179. f.seek(sec['Off'])
  180. data[name] = f.read(s['Size'])
  181. return data
  182. def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types):
  183. """Identify relocations for each symbol"""
  184. data = {}
  185. for name, s in sorted(syms.items()):
  186. b = bites.get(name)
  187. assert b is not None
  188. if s['Demangled Name'].startswith('typeinfo name'):
  189. data[name] = [('byte', int(x)) for x in b]
  190. continue
  191. data[name] = []
  192. for i in range(0, len(b), ptr_size):
  193. val = int.from_bytes(b[i*ptr_size:(i + 1)*ptr_size], byteorder='little')
  194. data[name].append(('offset', val))
  195. start = s['Value']
  196. finish = start + s['Size']
  197. # TODO: binary search (bisect)
  198. for rel in rels:
  199. if rel['Type'] in reloc_types and start <= rel['Offset'] < finish:
  200. i = (rel['Offset'] - start) // ptr_size
  201. assert i < len(data[name])
  202. data[name][i] = 'reloc', rel
  203. return data
  204. def generate_vtables(cls_tables, cls_syms, cls_data):
  205. """Generate code for vtables"""
  206. c_types = {
  207. 'reloc' : 'const void *',
  208. 'byte' : 'unsigned char',
  209. 'offset' : 'size_t'
  210. }
  211. ss = []
  212. ss.append('''\
  213. #ifdef __cplusplus
  214. extern "C" {
  215. #endif
  216. ''')
  217. # Print externs
  218. printed = set()
  219. for name, data in sorted(cls_data.items()):
  220. for typ, val in data:
  221. if typ != 'reloc':
  222. continue
  223. sym_name, addend = val['Symbol\'s Name + Addend']
  224. sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
  225. if sym_name not in cls_syms and sym_name not in printed:
  226. ss.append(f'''\
  227. extern const char {sym_name}[];
  228. ''')
  229. # Collect variable infos
  230. code_info = {}
  231. for name, s in sorted(cls_syms.items()):
  232. data = cls_data[name]
  233. if s['Demangled Name'].startswith('typeinfo name'):
  234. declarator = 'const unsigned char %s[]'
  235. else:
  236. field_types = (f'{c_types[typ]} field_{i};' for i, (typ, _) in enumerate(data))
  237. declarator = 'const struct { %s } %%s' % ' '.join(field_types) # pylint: disable=C0209 # consider-using-f-string
  238. vals = []
  239. for typ, val in data:
  240. if typ != 'reloc':
  241. vals.append(str(val) + 'UL')
  242. else:
  243. sym_name, addend = val['Symbol\'s Name + Addend']
  244. sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
  245. vals.append(f'(const char *)&{sym_name} + {addend}')
  246. code_info[name] = (declarator, '{ %s }' % ', '.join(vals)) # pylint: disable= C0209 # consider-using-f-string
  247. # Print declarations
  248. for name, (decl, _) in sorted(code_info.items()):
  249. type_name = name + '_type'
  250. type_decl = decl % type_name
  251. ss.append(f'''\
  252. typedef {type_decl};
  253. extern __attribute__((weak)) {type_name} {name};
  254. ''')
  255. # Print definitions
  256. for name, (_, init) in sorted(code_info.items()):
  257. type_name = name + '_type'
  258. ss.append(f'''\
  259. const {type_name} {name} = {init};
  260. ''')
  261. ss.append('''\
  262. #ifdef __cplusplus
  263. } // extern "C"
  264. #endif
  265. ''')
  266. return ''.join(ss)
  267. def read_soname(f):
  268. """Read ELF's SONAME."""
  269. out, _ = run(['readelf', '-d', f])
  270. for line in out.splitlines():
  271. line = line.strip()
  272. if not line:
  273. continue
  274. # 0x000000000000000e (SONAME) Library soname: [libndp.so.0]
  275. soname_match = re.search(r'\(SONAME\).*\[(.+)\]', line)
  276. if soname_match is not None:
  277. return soname_match[1]
  278. return None
  279. def main():
  280. """Driver function"""
  281. parser = argparse.ArgumentParser(description="Generate wrappers for shared library functions.",
  282. formatter_class=argparse.RawDescriptionHelpFormatter,
  283. epilog=f"""\
  284. Examples:
  285. $ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0
  286. Generating libaccountsservice.so.0.tramp.S...
  287. Generating libaccountsservice.so.0.init.c...
  288. """)
  289. parser.add_argument('library',
  290. metavar='LIB',
  291. help="Library to be wrapped.")
  292. parser.add_argument('--verbose', '-v',
  293. help="Print diagnostic info",
  294. action='count',
  295. default=0)
  296. parser.add_argument('--dlopen',
  297. help="Emit dlopen call (default)",
  298. dest='dlopen', action='store_true', default=True)
  299. parser.add_argument('--no-dlopen',
  300. help="Do not emit dlopen call (user must load/unload library himself)",
  301. dest='dlopen', action='store_false')
  302. parser.add_argument('--dlopen-callback',
  303. help="Call user-provided custom callback to load library instead of dlopen",
  304. default='')
  305. parser.add_argument('--dlsym-callback',
  306. help="Call user-provided custom callback to resolve a symbol, "
  307. "instead of dlsym",
  308. default='')
  309. parser.add_argument('--library-load-name',
  310. help="Use custom name for dlopened library (default is SONAME)")
  311. parser.add_argument('--lazy-load',
  312. help="Load library on first call to any of it's functions (default)",
  313. dest='lazy_load', action='store_true', default=True)
  314. parser.add_argument('--no-lazy-load',
  315. help="Load library at program start",
  316. dest='lazy_load', action='store_false')
  317. parser.add_argument('--vtables',
  318. help="Intercept virtual tables (EXPERIMENTAL)",
  319. dest='vtables', action='store_true', default=False)
  320. parser.add_argument('--no-vtables',
  321. help="Do not intercept virtual tables (default)",
  322. dest='vtables', action='store_false')
  323. parser.add_argument('--no-weak-symbols',
  324. help="Don't bind weak symbols", dest='no_weak_symbols',
  325. action='store_true', default=False)
  326. parser.add_argument('--target',
  327. help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi "
  328. "(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8, "
  329. "mips/mipsel, mips64/mip64el and e2k are supported)",
  330. default=os.uname()[-1])
  331. parser.add_argument('--symbol-list',
  332. help="Path to file with symbols that should be present in wrapper "
  333. "(all by default)")
  334. parser.add_argument('--symbol-prefix',
  335. metavar='PFX',
  336. help="Prefix wrapper symbols with PFX",
  337. default='')
  338. parser.add_argument('-q', '--quiet',
  339. help="Do not print progress info",
  340. action='store_true')
  341. parser.add_argument('--outdir', '-o',
  342. help="Path to create wrapper at",
  343. default='./')
  344. args = parser.parse_args()
  345. input_name = args.library
  346. verbose = args.verbose
  347. dlopen_callback = args.dlopen_callback
  348. dlsym_callback = args.dlsym_callback
  349. dlopen = args.dlopen
  350. lazy_load = args.lazy_load
  351. if args.target.startswith('arm'):
  352. target = 'arm' # Handle armhf-..., armel-...
  353. elif re.match(r'^i[0-9]86', args.target):
  354. target = 'i386'
  355. elif args.target.startswith('mips64'):
  356. target = 'mips64' # Handle mips64-..., mips64el-..., mips64le-...
  357. elif args.target.startswith('mips'):
  358. target = 'mips' # Handle mips-..., mipsel-..., mipsle-...
  359. else:
  360. target = args.target.split('-')[0]
  361. quiet = args.quiet
  362. outdir = args.outdir
  363. if args.symbol_list is None:
  364. funs = None
  365. else:
  366. with open(args.symbol_list, 'r') as f:
  367. funs = []
  368. for line in re.split(r'\r?\n', f.read()):
  369. line = re.sub(r'#.*', '', line)
  370. line = line.strip()
  371. if line:
  372. funs.append(line)
  373. if args.library_load_name is not None:
  374. load_name = args.library_load_name
  375. else:
  376. load_name = read_soname(input_name)
  377. if load_name is None:
  378. load_name = os.path.basename(input_name)
  379. # Collect target info
  380. target_dir = os.path.join(root, 'arch', target)
  381. if not os.path.exists(target_dir):
  382. error(f"unknown architecture '{target}'")
  383. cfg = configparser.ConfigParser(inline_comment_prefixes=';')
  384. cfg.read(target_dir + '/config.ini')
  385. ptr_size = int(cfg['Arch']['PointerSize'])
  386. symbol_reloc_types = set(re.split(r'\s*,\s*', cfg['Arch']['SymbolReloc']))
  387. def is_exported(s):
  388. conditions = [
  389. s['Bind'] != 'LOCAL',
  390. s['Type'] != 'NOTYPE',
  391. s['Ndx'] != 'UND',
  392. s['Name'] not in ['', '_init', '_fini']]
  393. if args.no_weak_symbols:
  394. conditions.append(s['Bind'] != 'WEAK')
  395. return all(conditions)
  396. syms = list(filter(is_exported, collect_syms(input_name)))
  397. def is_data_symbol(s):
  398. return (s['Type'] == 'OBJECT'
  399. # Allow vtables if --vtables is on
  400. and not (' for ' in s['Demangled Name'] and args.vtables))
  401. exported_data = [s['Name'] for s in syms if is_data_symbol(s)]
  402. if exported_data:
  403. # TODO: we can generate wrappers for const data without relocations (or only code relocations)
  404. warn(f"library '{input_name}' contains data symbols which won't be intercepted: "
  405. + ', '.join(exported_data))
  406. # Collect functions
  407. # TODO: warn if user-specified functions are missing
  408. orig_funs = filter(lambda s: s['Type'] == 'FUNC', syms)
  409. all_funs = set()
  410. warn_versioned = False
  411. for s in orig_funs:
  412. if not s['Default']:
  413. # TODO: support versions
  414. if not warn_versioned:
  415. warn(f"library {input_name} contains versioned symbols which are NYI")
  416. warn_versioned = True
  417. if verbose:
  418. print(f"Skipping versioned symbol {s['Name']}")
  419. continue
  420. all_funs.add(s['Name'])
  421. if funs is None:
  422. funs = sorted(list(all_funs))
  423. if not funs and not quiet:
  424. warn(f"no public functions were found in {input_name}")
  425. else:
  426. missing_funs = [name for name in funs if name not in all_funs]
  427. if missing_funs:
  428. warn("some user-specified functions are not present in library: " + ', '.join(missing_funs))
  429. funs = [name for name in funs if name in all_funs]
  430. if verbose:
  431. print("Exported functions:")
  432. for i, fun in enumerate(funs):
  433. print(f" {i}: {fun}")
  434. # Collect vtables
  435. if args.vtables:
  436. cls_tables = {}
  437. cls_syms = {}
  438. for s in syms:
  439. m = re.match(r'^(vtable|typeinfo|typeinfo name) for (.*)', s['Demangled Name'])
  440. if m is not None and is_exported(s):
  441. typ, cls = m.groups()
  442. name = s['Name']
  443. cls_tables.setdefault(cls, {})[typ] = name
  444. cls_syms[name] = s
  445. if verbose:
  446. print("Exported classes:")
  447. for cls, _ in sorted(cls_tables.items()):
  448. print(f" {cls}")
  449. secs = collect_sections(input_name)
  450. if verbose:
  451. print("Sections:")
  452. for sec in secs:
  453. print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), "
  454. f"at {sec['Off']:x}")
  455. bites = read_unrelocated_data(input_name, cls_syms, secs)
  456. rels = collect_relocs(input_name)
  457. if verbose:
  458. print("Relocs:")
  459. for rel in rels:
  460. sym_add = rel['Symbol\'s Name + Addend']
  461. print(f" {rel['Offset']}: {sym_add}")
  462. cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types)
  463. if verbose:
  464. print("Class data:")
  465. for name, data in sorted(cls_data.items()):
  466. demangled_name = cls_syms[name]['Demangled Name']
  467. print(f" {name} ({demangled_name}):")
  468. for typ, val in data:
  469. print(" " + str(val if typ != 'reloc' else val['Symbol\'s Name + Addend']))
  470. # Generate assembly code
  471. suffix = os.path.basename(input_name)
  472. lib_suffix = re.sub(r'[^a-zA-Z_0-9]+', '_', suffix)
  473. tramp_file = f'{suffix}.tramp.S'
  474. with open(os.path.join(outdir, tramp_file), 'w') as f:
  475. if not quiet:
  476. print(f"Generating {tramp_file}...")
  477. with open(target_dir + '/table.S.tpl', 'r') as t:
  478. table_text = string.Template(t.read()).substitute(
  479. lib_suffix=lib_suffix,
  480. table_size=ptr_size*(len(funs) + 1))
  481. f.write(table_text)
  482. with open(target_dir + '/trampoline.S.tpl', 'r') as t:
  483. tramp_tpl = string.Template(t.read())
  484. for i, name in enumerate(funs):
  485. tramp_text = tramp_tpl.substitute(
  486. lib_suffix=lib_suffix,
  487. sym=args.symbol_prefix + name,
  488. offset=i*ptr_size,
  489. number=i)
  490. f.write(tramp_text)
  491. # Generate C code
  492. init_file = f'{suffix}.init.c'
  493. with open(os.path.join(outdir, init_file), 'w') as f:
  494. if not quiet:
  495. print(f"Generating {init_file}...")
  496. with open(os.path.join(root, 'arch/common/init.c.tpl'), 'r') as t:
  497. if funs:
  498. sym_names = ',\n '.join(f'"{name}"' for name in funs) + ','
  499. else:
  500. sym_names = ''
  501. init_text = string.Template(t.read()).substitute(
  502. lib_suffix=lib_suffix,
  503. load_name=load_name,
  504. dlopen_callback=dlopen_callback,
  505. dlsym_callback=dlsym_callback,
  506. has_dlopen_callback=int(bool(dlopen_callback)),
  507. has_dlsym_callback=int(bool(dlsym_callback)),
  508. no_dlopen=int(not dlopen),
  509. lazy_load=int(lazy_load),
  510. sym_names=sym_names)
  511. f.write(init_text)
  512. if args.vtables:
  513. vtable_text = generate_vtables(cls_tables, cls_syms, cls_data)
  514. f.write(vtable_text)
  515. if __name__ == '__main__':
  516. main()