| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- """Setup utility for gcld3."""
- import os
- import platform
- import shutil
- import subprocess
- import setuptools
- from setuptools.command import build_ext
- __version__ = '3.0.13'
- _NAME = 'gcld3'
- REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']
- PROTO_FILES = [
- 'src/feature_extractor.proto',
- 'src/sentence.proto',
- 'src/task_spec.proto',
- ]
- SRCS = [
- 'src/base.cc',
- 'src/embedding_feature_extractor.cc',
- 'src/embedding_network.cc',
- 'src/feature_extractor.cc',
- 'src/feature_types.cc',
- 'src/fml_parser.cc',
- 'src/lang_id_nn_params.cc',
- 'src/language_identifier_features.cc',
- 'src/language_identifier_main.cc',
- 'src/nnet_language_identifier.cc',
- 'src/registry.cc',
- 'src/relevant_script_feature.cc',
- 'src/sentence_features.cc',
- 'src/task_context.cc',
- 'src/task_context_params.cc',
- 'src/unicodetext.cc',
- 'src/utils.cc',
- 'src/workspace.cc',
- 'src/script_span/fixunicodevalue.cc',
- 'src/script_span/generated_entities.cc',
- 'src/script_span/generated_ulscript.cc',
- 'src/script_span/getonescriptspan.cc',
- 'src/script_span/offsetmap.cc',
- 'src/script_span/text_processing.cc',
- 'src/script_span/utf8statetable.cc',
- # These CC files have to be generated by the proto buffer compiler 'protoc'
- 'src/cld_3/protos/feature_extractor.pb.cc',
- 'src/cld_3/protos/sentence.pb.cc',
- 'src/cld_3/protos/task_spec.pb.cc',
- # pybind11 bindings
- 'gcld3/pybind_ext.cc',
- ]
- class CompileProtos(build_ext.build_ext):
- """Compile protocol buffers via `protoc` compiler."""
- def run(self):
- if shutil.which('protoc') is None:
- raise RuntimeError('Please install the proto buffer compiler.')
- # The C++ code expect the protos to be compiled under the following
- # directory, therefore, create it if necessary.
- compiled_protos_dir = 'src/cld_3/protos/'
- os.makedirs(compiled_protos_dir, exist_ok=True)
- command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
- command.extend(PROTO_FILES)
- subprocess.run(command, check=True, cwd='./')
- build_ext.build_ext.run(self)
- class PyBindIncludes(object):
- """Returns the include paths for pybind11 when needed.
- To delay the invocation of "pybind11.get_include()" until it is available
- in the environment. This lazy evaluation allows us to install it first, then
- import it later to determine the correct include paths.
- """
- def __str__(self):
- import pybind11 # pylint: disable=g-import-not-at-top
- return pybind11.get_include()
- MACOS = platform.system() == 'Darwin'
- ext_modules = [
- setuptools.Extension(
- 'gcld3.pybind_ext',
- sorted(SRCS),
- include_dirs=[
- PyBindIncludes(),
- ],
- libraries=['protobuf'],
- extra_compile_args=['-std=c++11', '-stdlib=libc++'] if MACOS else [],
- extra_link_args=['-stdlib=libc++'] if MACOS else [],
- language='c++'),
- ]
- DESCRIPTION = """CLD3 is a neural network model for language identification.
- This package contains the inference code and a trained model. See
- https://github.com/google/cld3 for more details.
- """
- setuptools.setup(
- author='Rami Al-Rfou',
- author_email='rmyeid@google.com',
- cmdclass={
- 'build_ext': CompileProtos,
- },
- ext_modules=ext_modules,
- packages=setuptools.find_packages(),
- description='CLD3 is a neural network model for language identification.',
- long_description=DESCRIPTION,
- name=_NAME,
- setup_requires=REQUIREMENTS,
- url='https://github.com/google/cld3',
- version=__version__,
- zip_safe=False,
- )
|