diff --git a/BUILDING.md b/BUILDING.md index 0e0379265f2eb0..aadba46020012d 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -35,21 +35,23 @@ file a new issue. * [Building Node.js](#building-nodejs-1) * [Android/Android-based devices (e.g. Firefox OS)](#androidandroid-based-devices-eg-firefox-os) * [`Intl` (ECMA-402) support](#intl-ecma-402-support) - * [Default: `small-icu` (English only) support](#default-small-icu-english-only-support) * [Build with full ICU support (all locales supported by ICU)](#build-with-full-icu-support-all-locales-supported-by-icu) * [Unix/macOS](#unixmacos) * [Windows](#windows-1) - * [Building without Intl support](#building-without-intl-support) + * [Trimmed: `small-icu` (English only) support](#trimmed-small-icu-english-only-support) * [Unix/macOS](#unixmacos-1) * [Windows](#windows-2) - * [Use existing installed ICU (Unix/macOS only)](#use-existing-installed-icu-unixmacOS-only) - * [Build with a specific ICU](#build-with-a-specific-icu) + * [Building without Intl support](#building-without-intl-support) * [Unix/macOS](#unixmacos-2) * [Windows](#windows-3) + * [Use existing installed ICU (Unix/macOS only)](#use-existing-installed-icu-unixmacOS-only) + * [Build with a specific ICU](#build-with-a-specific-icu) + * [Unix/macOS](#unixmacos-3) + * [Windows](#windows-4) * [Building Node.js with FIPS-compliant OpenSSL](#building-nodejs-with-fips-compliant-openssl) * [Building Node.js with external core modules](#building-nodejs-with-external-core-modules) - * [Unix/macOS](#unixmacos-3) - * [Windows](#windows-4) + * [Unix/macOS](#unixmacos-4) + * [Windows](#windows-5) * [Note for downstream distributors of Node.js](#note-for-downstream-distributors-of-nodejs) ## Supported platforms @@ -598,31 +600,40 @@ $ make ## `Intl` (ECMA-402) support [Intl](https://github.com/nodejs/node/blob/master/doc/api/intl.md) support is -enabled by default, with English data only. +enabled by default. -### Default: `small-icu` (English only) support +### Build with full ICU support (all locales supported by ICU) -By default, only English data is included, but -the full `Intl` (ECMA-402) APIs. It does not need to download -any dependencies to function. You can add full -data at runtime. +This is the default option. -### Build with full ICU support (all locales supported by ICU) +#### Unix/macOS -With the `--download=all`, this may download ICU if you don't have an -ICU in `deps/icu`. (The embedded `small-icu` included in the default -Node.js source does not include all locales.) +```console +$ ./configure --with-intl=full-icu +``` + +#### Windows + +```console +> .\vcbuild full-icu +``` + +### Trimmed: `small-icu` (English only) support + + In this configuration, only English data is included, but +the full `Intl` (ECMA-402) APIs. It does not need to download +any dependencies to function. You can add full data at runtime. #### Unix/macOS ```console -$ ./configure --with-intl=full-icu --download=all +$ ./configure --with-intl=small-icu ``` #### Windows ```console -> .\vcbuild full-icu download-all +> .\vcbuild small-icu ``` ### Building without Intl support diff --git a/configure.py b/configure.py index 473bbfeb294b34..0b6f67f68ccd75 100755 --- a/configure.py +++ b/configure.py @@ -11,6 +11,8 @@ import shlex import subprocess import shutil +import bz2 + from distutils.spawn import find_executable as which # If not run from node/, cd to node/. @@ -409,7 +411,7 @@ intl_optgroup.add_option('--with-intl', action='store', dest='with_intl', - default='small-icu', + default='full-icu', choices=valid_intl_modes, help='Intl mode (valid choices: {0}) [default: %default]'.format( ', '.join(valid_intl_modes))) @@ -1399,7 +1401,8 @@ def write_config(data, name): icu_parent_path = 'deps' # The full path to the ICU source directory. Should not include './'. - icu_full_path = 'deps/icu' + icu_deps_path = 'deps/icu' + icu_full_path = icu_deps_path # icu-tmp is used to download and unpack the ICU tarball. icu_tmp_path = os.path.join(icu_parent_path, 'icu-tmp') @@ -1407,30 +1410,26 @@ def write_config(data, name): # canned ICU. see tools/icu/README.md to update. canned_icu_dir = 'deps/icu-small' + # use the README to verify what the canned ICU is + canned_is_full = os.path.isfile(os.path.join(canned_icu_dir, 'README-FULL-ICU.txt')) + canned_is_small = os.path.isfile(os.path.join(canned_icu_dir, 'README-SMALL-ICU.txt')) + if canned_is_small: + warn('Ignoring %s - in-repo small icu is no longer supported.' % canned_icu_dir) + # We can use 'deps/icu-small' - pre-canned ICU *iff* - # - with_intl == small-icu (the default!) - # - with_icu_locales == 'root,en' (the default!) - # - deps/icu-small exists! + # - canned_is_full AND # - with_icu_source is unset (i.e. no other ICU was specified) - # (Note that this is the *DEFAULT CASE*.) # # This is *roughly* equivalent to - # $ configure --with-intl=small-icu --with-icu-source=deps/icu-small + # $ configure --with-intl=full-icu --with-icu-source=deps/icu-small # .. Except that we avoid copying icu-small over to deps/icu. # In this default case, deps/icu is ignored, although make clean will # still harmlessly remove deps/icu. - # are we using default locales? - using_default_locales = ( options.with_icu_locales == icu_default_locales ) - - # make sure the canned ICU really exists - canned_icu_available = os.path.isdir(canned_icu_dir) - - if (o['variables']['icu_small'] == b(True)) and using_default_locales and (not with_icu_source) and canned_icu_available: + if (not with_icu_source) and canned_is_full: # OK- we can use the canned ICU. - icu_config['variables']['icu_small_canned'] = 1 icu_full_path = canned_icu_dir - + icu_config['variables']['icu_full_canned'] = 1 # --with-icu-source processing # now, check that they didn't pass --with-icu-source=deps/icu elif with_icu_source and os.path.abspath(icu_full_path) == os.path.abspath(with_icu_source): @@ -1508,29 +1507,40 @@ def write_config(data, name): icu_endianness = sys.byteorder[0] o['variables']['icu_ver_major'] = icu_ver_major o['variables']['icu_endianness'] = icu_endianness - icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l') + icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l') # LE filename icu_data_file = 'icudt%s%s.dat' % (icu_ver_major, icu_endianness) # relative to configure icu_data_path = os.path.join(icu_full_path, 'source/data/in', - icu_data_file_l) + icu_data_file_l) # LE + compressed_data = '%s.bz2' % (icu_data_path) + if not os.path.isfile(icu_data_path) and os.path.isfile(compressed_data): + # unpack. deps/icu is a temporary path + if os.path.isdir(icu_tmp_path): + shutil.rmtree(icu_tmp_path) + os.mkdir(icu_tmp_path) + icu_data_path = os.path.join(icu_tmp_path, icu_data_file_l) + with open(icu_data_path, 'wb') as outf: + with bz2.BZ2File(compressed_data, 'rb') as inf: + shutil.copyfileobj(inf, outf) + # Now, proceed.. + # relative to dep.. - icu_data_in = os.path.join('..','..', icu_full_path, 'source/data/in', icu_data_file_l) + icu_data_in = os.path.join('..','..', icu_data_path) if not os.path.isfile(icu_data_path) and icu_endianness != 'l': # use host endianness icu_data_path = os.path.join(icu_full_path, 'source/data/in', - icu_data_file) - # relative to dep.. - icu_data_in = os.path.join('..', icu_full_path, 'source/data/in', - icu_data_file) - # this is the input '.dat' file to use .. icudt*.dat - # may be little-endian if from a icu-project.org tarball - o['variables']['icu_data_in'] = icu_data_in + icu_data_file) # will be generated if not os.path.isfile(icu_data_path): # .. and we're not about to build it from .gyp! error('''ICU prebuilt data file %s does not exist. See the README.md.''' % icu_data_path) + + # this is the input '.dat' file to use .. icudt*.dat + # may be little-endian if from a icu-project.org tarball + o['variables']['icu_data_in'] = icu_data_in + # map from variable name to subdirs icu_src = { 'stubdata': 'stubdata', @@ -1547,6 +1557,31 @@ def write_config(data, name): var = 'icu_src_%s' % i path = '../../%s/source/%s' % (icu_full_path, icu_src[i]) icu_config['variables'][var] = glob_to_var('tools/icu', path, 'patches/%s/source/%s' % (icu_ver_major, icu_src[i]) ) + # calculate platform-specific genccode args + # print("platform %s, flavor %s" % (sys.platform, flavor)) + # if sys.platform == 'darwin': + # shlib_suffix = '%s.dylib' + # elif sys.platform.startswith('aix'): + # shlib_suffix = '%s.a' + # else: + # shlib_suffix = 'so.%s' + if flavor == 'win': + icu_config['variables']['icu_asm_ext'] = 'obj' + icu_config['variables']['icu_asm_opts'] = [ '-o ' ] + elif with_intl == 'small-icu' or options.cross_compiling: + icu_config['variables']['icu_asm_ext'] = 'c' + icu_config['variables']['icu_asm_opts'] = [] + elif flavor == 'mac': + icu_config['variables']['icu_asm_ext'] = 'S' + icu_config['variables']['icu_asm_opts'] = [ '-a', 'gcc-darwin' ] + elif sys.platform.startswith('aix'): + icu_config['variables']['icu_asm_ext'] = 'S' + icu_config['variables']['icu_asm_opts'] = [ '-a', 'xlc' ] + else: + # assume GCC-compatible asm is OK + icu_config['variables']['icu_asm_ext'] = 'S' + icu_config['variables']['icu_asm_opts'] = [ '-a', 'gcc' ] + # write updated icu_config.gypi with a bunch of paths write(icu_config_name, do_not_edit + pprint.pformat(icu_config, indent=2) + '\n') diff --git a/deps/icu-small/README-FULL-ICU.txt b/deps/icu-small/README-FULL-ICU.txt new file mode 100644 index 00000000000000..949126227f4400 --- /dev/null +++ b/deps/icu-small/README-FULL-ICU.txt @@ -0,0 +1,9 @@ +ICU sources - auto generated by shrink-icu-src.py + +This directory contains the ICU subset used by --with-intl=full-icu +It is a strict subset of ICU 64 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt64l.dat.bz2 : compressed data file + + +To rebuild this directory, see ../../tools/icu/README.md + diff --git a/deps/icu-small/README-SMALL-ICU.txt b/deps/icu-small/README-SMALL-ICU.txt deleted file mode 100644 index efc3ebe925cbba..00000000000000 --- a/deps/icu-small/README-SMALL-ICU.txt +++ /dev/null @@ -1,8 +0,0 @@ -Small ICU sources - auto generated by shrink-icu-src.py - -This directory contains the ICU subset used by --with-intl=small-icu (the default) -It is a strict subset of ICU 64 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt64l.dat : Reduced-size data file - - -To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/data/in/icudt64l.dat b/deps/icu-small/source/data/in/icudt64l.dat deleted file mode 100644 index 2ff9d277c58be3..00000000000000 Binary files a/deps/icu-small/source/data/in/icudt64l.dat and /dev/null differ diff --git a/deps/icu-small/source/data/in/icudt64l.dat.bz2 b/deps/icu-small/source/data/in/icudt64l.dat.bz2 new file mode 100644 index 00000000000000..5676ffb70f8b9d Binary files /dev/null and b/deps/icu-small/source/data/in/icudt64l.dat.bz2 differ diff --git a/doc/api/intl.md b/doc/api/intl.md index be30e4e2528607..35124474067806 100644 --- a/doc/api/intl.md +++ b/doc/api/intl.md @@ -23,11 +23,9 @@ programs. Some of them are: * [`RegExp` Unicode Property Escapes][] Node.js (and its underlying V8 engine) uses [ICU][] to implement these features -in native C/C++ code. However, some of them require a very large ICU data file -in order to support all locales of the world. Because it is expected that most -Node.js users will make use of only a small portion of ICU functionality, only -a subset of the full ICU data set is provided by Node.js by default. Several -options are provided for customizing and expanding the ICU data set either when +in native C/C++ code. The full ICU data set is provided by Node.js by default. +However, due to the size of the ICU data file, several +options are provided for customizing the ICU data set either when building or running Node.js. ## Options for building Node.js @@ -38,8 +36,8 @@ in [BUILDING.md][]. * `--with-intl=none`/`--without-intl` * `--with-intl=system-icu` -* `--with-intl=small-icu` (default) -* `--with-intl=full-icu` +* `--with-intl=small-icu` +* `--with-intl=full-icu` (default) An overview of available Node.js and JavaScript features for each `configure` option: @@ -66,8 +64,8 @@ operation is identical to that of `Date.prototype.toString()`. ### Disable all internationalization features (`none`) -If this option is chosen, most internationalization features mentioned above -will be **unavailable** in the resulting `node` binary. +If this option is chosen, ICU is disabled and most internationalization +features mentioned above will be **unavailable** in the resulting `node` binary. ### Build with a pre-installed ICU (`system-icu`) @@ -106,9 +104,7 @@ console.log(spanish.format(january)); // Should print "enero" ``` -This mode provides a good balance between features and binary size, and it is -the default behavior if no `--with-intl` flag is passed. The official binaries -are also built in this mode. +This mode provides a balance between features and binary size. #### Providing ICU data at runtime @@ -149,8 +145,9 @@ enable full `Intl` support. This option makes the resulting binary link against ICU statically and include a full set of ICU data. A binary created this way has no further external -dependencies and supports all locales, but might be rather large. See -[BUILDING.md][BUILDING.md#full-icu] on how to compile a binary using this mode. +dependencies and supports all locales, but might be rather large. This is +the default behavior if no `--with-intl` flag is passed. The official binaries +are also built in this mode. ## Detecting internationalization support @@ -205,7 +202,6 @@ to be helpful: [`String.prototype.toUpperCase()`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toUpperCase [`require('buffer').transcode()`]: buffer.html#buffer_buffer_transcode_source_fromenc_toenc [`require('util').TextDecoder`]: util.html#util_class_util_textdecoder -[BUILDING.md#full-icu]: https://github.com/nodejs/node/blob/master/BUILDING.md#build-with-full-icu-support-all-locales-supported-by-icu [BUILDING.md]: https://github.com/nodejs/node/blob/master/BUILDING.md [ECMA-262]: https://tc39.github.io/ecma262/ [ECMA-402]: https://tc39.github.io/ecma402/ diff --git a/doc/api/util.md b/doc/api/util.md index e7892fbc056a29..d421eabf35837c 100644 --- a/doc/api/util.md +++ b/doc/api/util.md @@ -932,26 +932,9 @@ Per the [WHATWG Encoding Standard][], the encodings supported by the one or more aliases may be used. Different Node.js build configurations support different sets of encodings. -While a very basic set of encodings is supported even on Node.js builds without -ICU enabled, support for some encodings is provided only when Node.js is built -with ICU and using the full ICU data (see [Internationalization][]). +(see [Internationalization][]) -#### Encodings Supported Without ICU - -| Encoding | Aliases | -| ----------- | --------------------------------- | -| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | -| `'utf-16le'` | `'utf-16'` | - -#### Encodings Supported by Default (With ICU) - -| Encoding | Aliases | -| ----------- | --------------------------------- | -| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | -| `'utf-16le'` | `'utf-16'` | -| `'utf-16be'` | | - -#### Encodings Requiring Full ICU Data +#### Encodings Supported by Default (With Full ICU Data) | Encoding | Aliases | | ----------------- | -------------------------------- | @@ -990,6 +973,21 @@ with ICU and using the full ICU data (see [Internationalization][]). | `'shift_jis'` | `'csshiftjis'`, `'ms932'`, `'ms_kanji'`, `'shift-jis'`, `'sjis'`, `'windows-31j'`, `'x-sjis'` | | `'euc-kr'` | `'cseuckr'`, `'csksc56011987'`, `'iso-ir-149'`, `'korean'`, `'ks_c_5601-1987'`, `'ks_c_5601-1989'`, `'ksc5601'`, `'ksc_5601'`, `'windows-949'` | +#### Encodings Supported when Node.js is built with the `small-icu` option + +| Encoding | Aliases | +| ----------- | --------------------------------- | +| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | +| `'utf-16le'` | `'utf-16'` | +| `'utf-16be'` | | + +#### Encodings Supported when ICU is disabled + +| Encoding | Aliases | +| ----------- | --------------------------------- | +| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | +| `'utf-16le'` | `'utf-16'` | + The `'iso-8859-16'` encoding listed in the [WHATWG Encoding Standard][] is not supported. @@ -1005,9 +1003,9 @@ changes: * `encoding` {string} Identifies the `encoding` that this `TextDecoder` instance supports. **Default:** `'utf-8'`. * `options` {Object} - * `fatal` {boolean} `true` if decoding failures are fatal. This option is only - supported when ICU is enabled (see [Internationalization][]). **Default:** - `false`. + * `fatal` {boolean} `true` if decoding failures are fatal. + This option is not supported when ICU is disabled + (see [Internationalization][]). **Default:** `false`. * `ignoreBOM` {boolean} When `true`, the `TextDecoder` will include the byte order mark in the decoded result. When `false`, the byte order mark will be removed from the output. This option is only used when `encoding` is diff --git a/tools/icu/README.md b/tools/icu/README.md index 94eaf50a55c82a..51b58455b44a59 100644 --- a/tools/icu/README.md +++ b/tools/icu/README.md @@ -27,7 +27,7 @@ internationalization functionality. ```shell ./configure \ - --with-intl=small-icu \ + --with-intl=full-icu \ --with-icu-source=http://download.icu-project.org/files/icu4c/58.1/icu4c-58_1-src.tgz make ``` @@ -54,7 +54,7 @@ Also running new Intl.DateTimeFormat('es', {month: 'long'}).format(new Date(9E8)); ``` -…Should return `January` not `enero`. +…Should return `enero` not `January`. * Now, copy `deps/icu` over to `deps/icu-small` @@ -94,12 +94,12 @@ tools/license-builder.sh * Update the URL and hash for the full ICU file in `tools/icu/current_ver.dep`. It should match the ICU URL used in the first step. When this is done, the -following should build with full ICU. +following should build with small ICU. ```shell # clean up rm -rf out deps/icu deps/icu4c* -./configure --with-intl=full-icu --download=all +./configure --with-intl=small-icu --download=all make make test-ci ``` diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp index b8f0d13836dee1..d2d0e5a3180213 100644 --- a/tools/icu/icu-generic.gyp +++ b/tools/icu/icu-generic.gyp @@ -212,16 +212,17 @@ 'conditions': [ [ 'icu_small == "false"', { # and OS=win # full data - just build the full data file, then we are done. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], 'dependencies': [ 'genccode#host' ], 'actions': [ { 'action_name': 'icudata', 'msvs_quote_cmd': 0, 'inputs': [ '<(icu_data_in)' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], + # on Windows, we can go directly to .obj file (-o) option. 'action': [ '<(PRODUCT_DIR)/genccode', - '-o', + '<@(icu_asm_opts)', # -o '-d', '<(SHARED_INTERMEDIATE_DIR)', '-n', 'icudata', '-e', 'icudt<(icu_ver_major)', @@ -256,9 +257,9 @@ 'action_name': 'genccode', 'msvs_quote_cmd': 0, 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', - '-o', + '<@(icu_asm_opts)', # -o '-d', '<(SHARED_INTERMEDIATE_DIR)/', '-n', 'icudata', '-e', 'icusmdt<(icu_ver_major)', @@ -266,20 +267,20 @@ }, ], # This file contains the small ICU data. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], } ] ], #end of OS==win and icu_small == true }, { # OS != win 'conditions': [ [ 'icu_small == "false"', { - # full data - just build the full data file, then we are done. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + # full data - no trim needed + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'dependencies': [ 'genccode#host', 'icupkg#host', 'icu_implementation#host', 'icu_uconfig' ], 'include_dirs': [ '<(icu_path)/source/common', ], 'actions': [ { - # Swap endianness (if needed), or at least copy the file + # Copy the .dat file, swapping endianness if needed. 'action_name': 'icupkg', 'inputs': [ '<(icu_data_in)' ], 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], @@ -290,7 +291,7 @@ ], }, { - # Rename without the endianness marker + # Rename without the endianness marker (icudt64l.dat -> icudt64.dat) 'action_name': 'copy', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], @@ -300,12 +301,14 @@ ], }, { + # convert full ICU data file to .c, or .S, etc. 'action_name': 'icudata', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], - 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', '-e', 'icudt<(icu_ver_major)', '-d', '<(SHARED_INTERMEDIATE_DIR)', + '<@(icu_asm_opts)', '-f', 'icudt<(icu_ver_major)_dat', '<@(_inputs)' ], }, @@ -318,7 +321,8 @@ 'export_dependent_settings': [ 'icustubdata' ], 'actions': [ { - # trim down ICU + # Trim down ICU. + # Note that icupkg is invoked automatically, swapping endianness if needed. 'action_name': 'icutrim', 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], @@ -333,7 +337,7 @@ '-v', '-L', '<(icu_locales)'], }, { - # rename to get the final entrypoint name right + # rename to get the final entrypoint name right (icudt64l.dat -> icusmdt64.dat) 'action_name': 'rename', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], @@ -342,17 +346,18 @@ '<@(_outputs)', ], }, { - # build final .dat -> .obj + # For icu-small, always use .c, don't try to use .S, etc. 'action_name': 'genccode', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', + '<@(icu_asm_opts)', '-d', '<(SHARED_INTERMEDIATE_DIR)', '<@(_inputs)' ], }, ], # This file contains the small ICU data - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ], # for umachine.h 'include_dirs': [ '<(icu_path)/source/common', diff --git a/tools/icu/shrink-icu-src.py b/tools/icu/shrink-icu-src.py index 0df16cde2117d6..c91472ed3082dd 100644 --- a/tools/icu/shrink-icu-src.py +++ b/tools/icu/shrink-icu-src.py @@ -5,14 +5,15 @@ import re import sys import shutil +import bz2 parser = optparse.OptionParser() -parser.add_option('--icu-small', +parser.add_option('--icudst', action='store', - dest='icusmall', + dest='icudst', default='deps/icu-small', - help='path to target ICU directory to shrink. Will be deleted.') + help='path to target ICU directory. Will be deleted.') parser.add_option('--icu-src', action='store', @@ -26,18 +27,26 @@ default='out/Release/obj/gen/icutmp', help='path to icutmp dir.') - (options, args) = parser.parse_args() -if os.path.isdir(options.icusmall): - print('Deleting existing icusmall %s' % (options.icusmall)) - shutil.rmtree(options.icusmall) +if os.path.isdir(options.icudst): + print('Deleting existing icudst %s' % (options.icudst)) + shutil.rmtree(options.icudst) if not os.path.isdir(options.icusrc): print('Missing source ICU dir --icusrc=%s' % (options.icusrc)) sys.exit(1) +# compression stuff. Keep the suffix and the compression function in sync. +compression_suffix = '.bz2' +def compress_data(infp, outfp): + with open(infp, 'rb') as inf: + with bz2.BZ2File(outfp, 'wb') as outf: + shutil.copyfileobj(inf, outf) +def print_size(fn): + size = (os.stat(fn).st_size) / 1024000 + print('%dM\t%s' % (size, fn)) ignore_regex = re.compile('^.*\.(vcxproj|filters|nrm|icu|dat|xml|txt|ac|guess|m4|in|sub|py|mak)$') @@ -90,36 +99,41 @@ def icu_info(icu_full_path): return (icu_ver_major, icu_endianness) (icu_ver_major, icu_endianness) = icu_info(options.icusrc) -print("icudt%s%s" % (icu_ver_major, icu_endianness)) +print("Data file root: icudt%s%s" % (icu_ver_major, icu_endianness)) +dst_datafile = os.path.join(options.icudst, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness)) -src_datafile = os.path.join(options.icutmp, "icusmdt%s.dat" % (icu_ver_major)) -dst_datafile = os.path.join(options.icusmall, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness)) +src_datafile = os.path.join(options.icusrc, "source/data/in/icudt%sl.dat" % (icu_ver_major)) +dst_cmp_datafile = "%s%s" % (dst_datafile, compression_suffix) if not os.path.isfile(src_datafile): - print("Could not find source datafile %s - did you build small-icu node?" % src_datafile) - sys.exit(1) -else: - print("will use small datafile %s" % (src_datafile)) -print('%s --> %s' % (options.icusrc, options.icusmall)) -shutil.copytree(options.icusrc, options.icusmall, ignore=icu_ignore) -print('%s --> %s' % (src_datafile, dst_datafile)) + print("Error: icu data file not found: %s" % src_datafile) + exit(1) + +print("will use datafile %s" % (src_datafile)) + +print('%s --> %s' % (options.icusrc, options.icudst)) +shutil.copytree(options.icusrc, options.icudst, ignore=icu_ignore) # now, make the data dir (since we ignored it) -os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data"))) -os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data", "in"))) +icudst_data = os.path.join(options.icudst, "source", "data") +icudst_in = os.path.join(icudst_data, "in") +os.mkdir(icudst_data) +os.mkdir(icudst_in) -# OK, now copy the data file -shutil.copy(src_datafile, dst_datafile) +print_size(src_datafile) -# Now, print a short notice -readme_name = os.path.join(options.icusmall, "README-SMALL-ICU.txt" ) +print('%s --compress-> %s' % (src_datafile, dst_cmp_datafile)) +compress_data(src_datafile, dst_cmp_datafile) +print_size(dst_cmp_datafile) +readme_name = os.path.join(options.icudst, "README-FULL-ICU.txt" ) +# Now, print a short notice fi = open(readme_name, 'wb') -print("Small ICU sources - auto generated by shrink-icu-src.py", file=fi) +print("ICU sources - auto generated by shrink-icu-src.py", file=fi) print("", file=fi) -print("This directory contains the ICU subset used by --with-intl=small-icu (the default)", file=fi) +print("This directory contains the ICU subset used by --with-intl=full-icu", file=fi) print("It is a strict subset of ICU %s source files with the following exception(s):" % (icu_ver_major), file=fi) -print("* %s : Reduced-size data file" % (dst_datafile), file=fi) +print("* %s : compressed data file" % (dst_cmp_datafile), file=fi) print("", file=fi) print("", file=fi) print("To rebuild this directory, see ../../tools/icu/README.md", file=fi)