| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598 |
- #!/usr/bin/env python3
- # Copyright 2017-2023 Yury Gribov
- #
- # The MIT License (MIT)
- #
- # Use of this source code is governed by MIT license that can be
- # found in the LICENSE.txt file.
- """
- Generates static import library for POSIX shared library
- """
- import sys
- import os.path
- import re
- import subprocess
- import argparse
- import string
- import configparser
- me = os.path.basename(__file__)
- root = os.path.dirname(__file__)
- def warn(msg):
- """Emits a nicely-decorated warning."""
- sys.stderr.write(f'{me}: warning: {msg}\n')
- def error(msg):
- """Emits a nicely-decorated error and exits."""
- sys.stderr.write(f'{me}: error: {msg}\n')
- sys.exit(1)
- def run(args, stdin=''):
- """Runs external program and aborts on error."""
- env = os.environ.copy()
- # Force English language
- env['LC_ALL'] = 'c'
- try:
- del env["LANG"]
- except KeyError:
- pass
- with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE, env=env) as p:
- out, err = p.communicate(input=stdin.encode('utf-8'))
- out = out.decode('utf-8')
- err = err.decode('utf-8')
- if p.returncode != 0 or err:
- error(f"{args[0]} failed with retcode {p.returncode}:\n{err}")
- return out, err
- def make_toc(words, renames=None):
- "Make an mapping of words to their indices in list"
- renames = renames or {}
- toc = {}
- for i, n in enumerate(words):
- name = renames.get(n, n)
- toc[i] = name
- return toc
- def parse_row(words, toc, hex_keys):
- "Make a mapping from column names to values"
- vals = {k: (words[i] if i < len(words) else '') for i, k in toc.items()}
- for k in hex_keys:
- if vals[k]:
- vals[k] = int(vals[k], 16)
- return vals
- def collect_syms(f):
- """Collect ELF dynamic symtab."""
- # --dyn-syms does not always work for some reason so dump all symtabs
- out, _ = run(['readelf', '-sW', f])
- toc = None
- syms = []
- syms_set = set()
- for line in out.splitlines():
- line = line.strip()
- if not line:
- # Next symtab
- toc = None
- continue
- words = re.split(r' +', line)
- if line.startswith('Num'): # Header?
- if toc is not None:
- error("multiple headers in output of readelf")
- # Colons are different across readelf versions so get rid of them.
- toc = make_toc(map(lambda n: n.replace(':', ''), words))
- elif toc is not None:
- sym = parse_row(words, toc, ['Value'])
- name = sym['Name']
- if not name:
- continue
- if name in syms_set:
- continue
- syms_set.add(name)
- sym['Size'] = int(sym['Size'], 0) # Readelf is inconistent on Size format
- if '@' in name:
- sym['Default'] = '@@' in name
- name, ver = re.split(r'@+', name)
- sym['Name'] = name
- sym['Version'] = ver
- else:
- sym['Default'] = True
- sym['Version'] = None
- syms.append(sym)
- if toc is None:
- error(f"failed to analyze symbols in {f}")
- # Also collected demangled names
- if syms:
- out, _ = run(['c++filt'], '\n'.join((sym['Name'] for sym in syms)))
- out = out.rstrip("\n") # Some c++filts append newlines at the end
- for i, name in enumerate(out.split("\n")):
- syms[i]['Demangled Name'] = name
- return syms
- def collect_relocs(f):
- """Collect ELF dynamic relocs."""
- out, _ = run(['readelf', '-rW', f])
- toc = None
- rels = []
- for line in out.splitlines():
- line = line.strip()
- if not line:
- toc = None
- continue
- if line == 'There are no relocations in this file.':
- return []
- if re.match(r'^\s*Type[0-9]:', line): # Spurious lines for MIPS
- continue
- if re.match(r'^\s*Offset', line): # Header?
- if toc is not None:
- error("multiple headers in output of readelf")
- words = re.split(r'\s\s+', line) # "Symbol's Name + Addend"
- toc = make_toc(words)
- elif toc is not None:
- line = re.sub(r' \+ ', '+', line)
- words = re.split(r'\s+', line)
- rel = parse_row(words, toc, ['Offset', 'Info'])
- rels.append(rel)
- # Split symbolic representation
- sym_name = 'Symbol\'s Name + Addend'
- if sym_name not in rel and 'Symbol\'s Name' in rel:
- # Adapt to different versions of readelf
- rel[sym_name] = rel['Symbol\'s Name'] + '+0'
- if rel[sym_name]:
- p = rel[sym_name].split('+')
- if len(p) == 1:
- p = ['', p[0]]
- rel[sym_name] = (p[0], int(p[1], 16))
- if toc is None:
- error(f"failed to analyze relocations in {f}")
- return rels
- def collect_sections(f):
- """Collect section info from ELF."""
- out, _ = run(['readelf', '-SW', f])
- toc = None
- sections = []
- for line in out.splitlines():
- line = line.strip()
- if not line:
- continue
- line = re.sub(r'\[\s+', '[', line)
- words = re.split(r' +', line)
- if line.startswith('[Nr]'): # Header?
- if toc is not None:
- error("multiple headers in output of readelf")
- toc = make_toc(words, {'Addr' : 'Address'})
- elif line.startswith('[') and toc is not None:
- sec = parse_row(words, toc, ['Address', 'Off', 'Size'])
- if 'A' in sec['Flg']: # Allocatable section?
- sections.append(sec)
- if toc is None:
- error(f"failed to analyze sections in {f}")
- return sections
- def read_unrelocated_data(input_name, syms, secs):
- """Collect unrelocated data from ELF."""
- data = {}
- with open(input_name, 'rb') as f:
- def is_symbol_in_section(sym, sec):
- sec_end = sec['Address'] + sec['Size']
- is_start_in_section = sec['Address'] <= sym['Value'] < sec_end
- is_end_in_section = sym['Value'] + sym['Size'] <= sec_end
- return is_start_in_section and is_end_in_section
- for name, s in sorted(syms.items(), key=lambda s: s[1]['Value']):
- # TODO: binary search (bisect)
- sec = [sec for sec in secs if is_symbol_in_section(s, sec)]
- if len(sec) != 1:
- error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})")
- sec = sec[0]
- f.seek(sec['Off'])
- data[name] = f.read(s['Size'])
- return data
- def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types):
- """Identify relocations for each symbol"""
- data = {}
- for name, s in sorted(syms.items()):
- b = bites.get(name)
- assert b is not None
- if s['Demangled Name'].startswith('typeinfo name'):
- data[name] = [('byte', int(x)) for x in b]
- continue
- data[name] = []
- for i in range(0, len(b), ptr_size):
- val = int.from_bytes(b[i*ptr_size:(i + 1)*ptr_size], byteorder='little')
- data[name].append(('offset', val))
- start = s['Value']
- finish = start + s['Size']
- # TODO: binary search (bisect)
- for rel in rels:
- if rel['Type'] in reloc_types and start <= rel['Offset'] < finish:
- i = (rel['Offset'] - start) // ptr_size
- assert i < len(data[name])
- data[name][i] = 'reloc', rel
- return data
- def generate_vtables(cls_tables, cls_syms, cls_data):
- """Generate code for vtables"""
- c_types = {
- 'reloc' : 'const void *',
- 'byte' : 'unsigned char',
- 'offset' : 'size_t'
- }
- ss = []
- ss.append('''\
- #ifdef __cplusplus
- extern "C" {
- #endif
- ''')
- # Print externs
- printed = set()
- for name, data in sorted(cls_data.items()):
- for typ, val in data:
- if typ != 'reloc':
- continue
- sym_name, addend = val['Symbol\'s Name + Addend']
- sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
- if sym_name not in cls_syms and sym_name not in printed:
- ss.append(f'''\
- extern const char {sym_name}[];
- ''')
- # Collect variable infos
- code_info = {}
- for name, s in sorted(cls_syms.items()):
- data = cls_data[name]
- if s['Demangled Name'].startswith('typeinfo name'):
- declarator = 'const unsigned char %s[]'
- else:
- field_types = (f'{c_types[typ]} field_{i};' for i, (typ, _) in enumerate(data))
- declarator = 'const struct { %s } %%s' % ' '.join(field_types) # pylint: disable=C0209 # consider-using-f-string
- vals = []
- for typ, val in data:
- if typ != 'reloc':
- vals.append(str(val) + 'UL')
- else:
- sym_name, addend = val['Symbol\'s Name + Addend']
- sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
- vals.append(f'(const char *)&{sym_name} + {addend}')
- code_info[name] = (declarator, '{ %s }' % ', '.join(vals)) # pylint: disable= C0209 # consider-using-f-string
- # Print declarations
- for name, (decl, _) in sorted(code_info.items()):
- type_name = name + '_type'
- type_decl = decl % type_name
- ss.append(f'''\
- typedef {type_decl};
- extern __attribute__((weak)) {type_name} {name};
- ''')
- # Print definitions
- for name, (_, init) in sorted(code_info.items()):
- type_name = name + '_type'
- ss.append(f'''\
- const {type_name} {name} = {init};
- ''')
- ss.append('''\
- #ifdef __cplusplus
- } // extern "C"
- #endif
- ''')
- return ''.join(ss)
- def read_soname(f):
- """Read ELF's SONAME."""
- out, _ = run(['readelf', '-d', f])
- for line in out.splitlines():
- line = line.strip()
- if not line:
- continue
- # 0x000000000000000e (SONAME) Library soname: [libndp.so.0]
- soname_match = re.search(r'\(SONAME\).*\[(.+)\]', line)
- if soname_match is not None:
- return soname_match[1]
- return None
- def main():
- """Driver function"""
- parser = argparse.ArgumentParser(description="Generate wrappers for shared library functions.",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog=f"""\
- Examples:
- $ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0
- Generating libaccountsservice.so.0.tramp.S...
- Generating libaccountsservice.so.0.init.c...
- """)
- parser.add_argument('library',
- metavar='LIB',
- help="Library to be wrapped.")
- parser.add_argument('--verbose', '-v',
- help="Print diagnostic info",
- action='count',
- default=0)
- parser.add_argument('--dlopen',
- help="Emit dlopen call (default)",
- dest='dlopen', action='store_true', default=True)
- parser.add_argument('--no-dlopen',
- help="Do not emit dlopen call (user must load/unload library himself)",
- dest='dlopen', action='store_false')
- parser.add_argument('--dlopen-callback',
- help="Call user-provided custom callback to load library instead of dlopen",
- default='')
- parser.add_argument('--dlsym-callback',
- help="Call user-provided custom callback to resolve a symbol, "
- "instead of dlsym",
- default='')
- parser.add_argument('--library-load-name',
- help="Use custom name for dlopened library (default is SONAME)")
- parser.add_argument('--lazy-load',
- help="Load library on first call to any of it's functions (default)",
- dest='lazy_load', action='store_true', default=True)
- parser.add_argument('--no-lazy-load',
- help="Load library at program start",
- dest='lazy_load', action='store_false')
- parser.add_argument('--vtables',
- help="Intercept virtual tables (EXPERIMENTAL)",
- dest='vtables', action='store_true', default=False)
- parser.add_argument('--no-vtables',
- help="Do not intercept virtual tables (default)",
- dest='vtables', action='store_false')
- parser.add_argument('--no-weak-symbols',
- help="Don't bind weak symbols", dest='no_weak_symbols',
- action='store_true', default=False)
- parser.add_argument('--target',
- help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi "
- "(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8, "
- "mips/mipsel, mips64/mip64el and e2k are supported)",
- default=os.uname()[-1])
- parser.add_argument('--symbol-list',
- help="Path to file with symbols that should be present in wrapper "
- "(all by default)")
- parser.add_argument('--symbol-prefix',
- metavar='PFX',
- help="Prefix wrapper symbols with PFX",
- default='')
- parser.add_argument('-q', '--quiet',
- help="Do not print progress info",
- action='store_true')
- parser.add_argument('--outdir', '-o',
- help="Path to create wrapper at",
- default='./')
- args = parser.parse_args()
- input_name = args.library
- verbose = args.verbose
- dlopen_callback = args.dlopen_callback
- dlsym_callback = args.dlsym_callback
- dlopen = args.dlopen
- lazy_load = args.lazy_load
- if args.target.startswith('arm'):
- target = 'arm' # Handle armhf-..., armel-...
- elif re.match(r'^i[0-9]86', args.target):
- target = 'i386'
- elif args.target.startswith('mips64'):
- target = 'mips64' # Handle mips64-..., mips64el-..., mips64le-...
- elif args.target.startswith('mips'):
- target = 'mips' # Handle mips-..., mipsel-..., mipsle-...
- else:
- target = args.target.split('-')[0]
- quiet = args.quiet
- outdir = args.outdir
- if args.symbol_list is None:
- funs = None
- else:
- with open(args.symbol_list, 'r') as f:
- funs = []
- for line in re.split(r'\r?\n', f.read()):
- line = re.sub(r'#.*', '', line)
- line = line.strip()
- if line:
- funs.append(line)
- if args.library_load_name is not None:
- load_name = args.library_load_name
- else:
- load_name = read_soname(input_name)
- if load_name is None:
- load_name = os.path.basename(input_name)
- # Collect target info
- target_dir = os.path.join(root, 'arch', target)
- if not os.path.exists(target_dir):
- error(f"unknown architecture '{target}'")
- cfg = configparser.ConfigParser(inline_comment_prefixes=';')
- cfg.read(target_dir + '/config.ini')
- ptr_size = int(cfg['Arch']['PointerSize'])
- symbol_reloc_types = set(re.split(r'\s*,\s*', cfg['Arch']['SymbolReloc']))
- def is_exported(s):
- conditions = [
- s['Bind'] != 'LOCAL',
- s['Type'] != 'NOTYPE',
- s['Ndx'] != 'UND',
- s['Name'] not in ['', '_init', '_fini']]
- if args.no_weak_symbols:
- conditions.append(s['Bind'] != 'WEAK')
- return all(conditions)
- syms = list(filter(is_exported, collect_syms(input_name)))
- def is_data_symbol(s):
- return (s['Type'] == 'OBJECT'
- # Allow vtables if --vtables is on
- and not (' for ' in s['Demangled Name'] and args.vtables))
- exported_data = [s['Name'] for s in syms if is_data_symbol(s)]
- if exported_data:
- # TODO: we can generate wrappers for const data without relocations (or only code relocations)
- warn(f"library '{input_name}' contains data symbols which won't be intercepted: "
- + ', '.join(exported_data))
- # Collect functions
- # TODO: warn if user-specified functions are missing
- orig_funs = filter(lambda s: s['Type'] == 'FUNC', syms)
- all_funs = set()
- warn_versioned = False
- for s in orig_funs:
- if not s['Default']:
- # TODO: support versions
- if not warn_versioned:
- warn(f"library {input_name} contains versioned symbols which are NYI")
- warn_versioned = True
- if verbose:
- print(f"Skipping versioned symbol {s['Name']}")
- continue
- all_funs.add(s['Name'])
- if funs is None:
- funs = sorted(list(all_funs))
- if not funs and not quiet:
- warn(f"no public functions were found in {input_name}")
- else:
- missing_funs = [name for name in funs if name not in all_funs]
- if missing_funs:
- warn("some user-specified functions are not present in library: " + ', '.join(missing_funs))
- funs = [name for name in funs if name in all_funs]
- if verbose:
- print("Exported functions:")
- for i, fun in enumerate(funs):
- print(f" {i}: {fun}")
- # Collect vtables
- if args.vtables:
- cls_tables = {}
- cls_syms = {}
- for s in syms:
- m = re.match(r'^(vtable|typeinfo|typeinfo name) for (.*)', s['Demangled Name'])
- if m is not None and is_exported(s):
- typ, cls = m.groups()
- name = s['Name']
- cls_tables.setdefault(cls, {})[typ] = name
- cls_syms[name] = s
- if verbose:
- print("Exported classes:")
- for cls, _ in sorted(cls_tables.items()):
- print(f" {cls}")
- secs = collect_sections(input_name)
- if verbose:
- print("Sections:")
- for sec in secs:
- print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), "
- f"at {sec['Off']:x}")
- bites = read_unrelocated_data(input_name, cls_syms, secs)
- rels = collect_relocs(input_name)
- if verbose:
- print("Relocs:")
- for rel in rels:
- sym_add = rel['Symbol\'s Name + Addend']
- print(f" {rel['Offset']}: {sym_add}")
- cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types)
- if verbose:
- print("Class data:")
- for name, data in sorted(cls_data.items()):
- demangled_name = cls_syms[name]['Demangled Name']
- print(f" {name} ({demangled_name}):")
- for typ, val in data:
- print(" " + str(val if typ != 'reloc' else val['Symbol\'s Name + Addend']))
- # Generate assembly code
- suffix = os.path.basename(input_name)
- lib_suffix = re.sub(r'[^a-zA-Z_0-9]+', '_', suffix)
- tramp_file = f'{suffix}.tramp.S'
- with open(os.path.join(outdir, tramp_file), 'w') as f:
- if not quiet:
- print(f"Generating {tramp_file}...")
- with open(target_dir + '/table.S.tpl', 'r') as t:
- table_text = string.Template(t.read()).substitute(
- lib_suffix=lib_suffix,
- table_size=ptr_size*(len(funs) + 1))
- f.write(table_text)
- with open(target_dir + '/trampoline.S.tpl', 'r') as t:
- tramp_tpl = string.Template(t.read())
- for i, name in enumerate(funs):
- tramp_text = tramp_tpl.substitute(
- lib_suffix=lib_suffix,
- sym=args.symbol_prefix + name,
- offset=i*ptr_size,
- number=i)
- f.write(tramp_text)
- # Generate C code
- init_file = f'{suffix}.init.c'
- with open(os.path.join(outdir, init_file), 'w') as f:
- if not quiet:
- print(f"Generating {init_file}...")
- with open(os.path.join(root, 'arch/common/init.c.tpl'), 'r') as t:
- if funs:
- sym_names = ',\n '.join(f'"{name}"' for name in funs) + ','
- else:
- sym_names = ''
- init_text = string.Template(t.read()).substitute(
- lib_suffix=lib_suffix,
- load_name=load_name,
- dlopen_callback=dlopen_callback,
- dlsym_callback=dlsym_callback,
- has_dlopen_callback=int(bool(dlopen_callback)),
- has_dlsym_callback=int(bool(dlsym_callback)),
- no_dlopen=int(not dlopen),
- lazy_load=int(lazy_load),
- sym_names=sym_names)
- f.write(init_text)
- if args.vtables:
- vtable_text = generate_vtables(cls_tables, cls_syms, cls_data)
- f.write(vtable_text)
- if __name__ == '__main__':
- main()
|