jquast · jquast · Nov 13, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -136,6 +136,8 @@ jobs:
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
+        env:
+          CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
 
       - name: Fail if coverage is <100%.
         run: |
@@ -148,3 +150,5 @@ jobs:
           name: html-report
           path: htmlcov
         if: ${{ failure() }}
+        env:
+          CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
diff --git a/bin/update-tables.py b/bin/update-tables.py
diff --git a/bin/verify-table-integrity.py b/bin/verify-table-integrity.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+This is a small script to make an inquiry into the version history of unicode data tables, and to
+validate conflicts in the tables as they are published:
+
+- check for individual code point definitions change in in subsequent releases,
+  these should be considered before attempting to reduce the size of our versioned
+  tables without a careful incremental change description.  Each "violation" is
+  logged as INFO.
+- check that a codepoint in the 'zero' table is not present in the 'wide' table
+  and vice versa. This is logged as ERROR and causes program to exit 1.
+
+Some examples of the first kind,
+
+1.
+
+    value 0x1f93b in table WIDE_EASTASIAN version 12.1.0 is not defined in 13.0.0 from range ('0x1f90d', '0x1f971')
+    value 0x1f946 in table WIDE_EASTASIAN version 12.1.0 is not defined in 13.0.0 from range ('0x1f90d', '0x1f971')
+
+two characters were changed from 'W' to 'N':
+
+    -EastAsianWidth-12.0.0.txt:1F90D..1F971;W   # So   [101] WHITE HEART..YAWNING FACE
+    +EastAsianWidth-12.1.0.txt:1F90C..1F93A;W   # So    [47] PINCHED FINGERS..FENCER
+    +EastAsianWidth-12.1.0.txt:1F93B;N          # So         MODERN PENTATHLON
+    +EastAsianWidth-12.1.0.txt:1F93C..1F945;W   # So    [10] WRESTLERS..GOAL NET
+    +EastAsianWidth-12.1.0.txt:1F946;N          # So         RIFLE
+    +EastAsianWidth-12.1.0.txt:1F947..1F978;W   # So    [50] FIRST PLACE MEDAL..DISGUISED FACE
+
+As well as for output,
+
+    value 0x11a3 in table WIDE_EASTASIAN version 6.1.0 is not defined in 6.2.0 from range ('0x11a3', '0x11a7')
+    ...
+    value 0x11fe in table WIDE_EASTASIAN version 6.1.0 is not defined in 6.2.0 from range ('0x11fa', '0x11ff')
+
+Category code was changed from 'W' to 'N':
+
+    -EastAsianWidth-6.1.0.txt:11A3;W # HANGUL JUNGSEONG A-EU
+    +EastAsianWidth-6.2.0.txt:11A3;N # HANGUL JUNGSEONG A-EU
+
+
+2.
+
+    value 0x1cf2 in table ZERO_WIDTH version 11.0.0 is not defined in 12.0.0 from range ('0x1cf2', '0x1cf4')
+    value 0x1cf3 in table ZERO_WIDTH version 11.0.0 is not defined in 12.0.0 from range ('0x1cf2', '0x1cf4')
+
+Category code was changed from 'Mc' to 'Lo':
+
+    -DerivedGeneralCategory-11.0.0.txt:1CF2..1CF3    ; Mc #   [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
+    +DerivedGeneralCategory-12.0.0.txt:1CEE..1CF3    ; Lo #   [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA
+
+As well as for output,
+
+     value 0x19b0 in table ZERO_WIDTH version 7.0.0 is not defined in 8.0.0 from range ('0x19b0', '0x19c0')
+     ...
+     value 0x19c8 in table ZERO_WIDTH version 7.0.0 is not defined in 8.0.0 from range ('0x19c8', '0x19c9')
+
+Category code was changed from 'Mc' to 'Lo':
+
+    -DerivedGeneralCategory-7.0.0.txt:19B0..19C0    ; Mc #  [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
+    +DerivedGeneralCategory-8.0.0.txt:19B0..19C9    ; Lo #  [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
+"""
+# std imports
+import logging
+
+
+def main(log: logging.Logger):
+    # local
+    from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, _bisearch, list_versions
+    reversed_uni_versions = list(reversed(list_versions()))
+    tables = {'ZERO_WIDTH': ZERO_WIDTH,
+              'WIDE_EASTASIAN': WIDE_EASTASIAN}
+    errors = 0
+    for idx, version in enumerate(reversed_uni_versions):
+        if idx == 0:
+            continue
+        next_version = reversed_uni_versions[idx - 1]
+        for table_name, table in tables.items():
+            next_table = table[next_version]
+            curr_table = table[version]
+            other_table_name = 'WIDE_EASTASIAN' if table_name == 'ZERO_WIDTH' else 'ZERO_WIDTH'
+            other_table = tables[other_table_name][version]
+            for start_range, stop_range in curr_table:
+                for unichar_n in range(start_range, stop_range):
+                    if not _bisearch(unichar_n, next_table):
+                        log.info(f'value {hex(unichar_n)} in table_name={table_name}'
+                                 f' version={version} is not defined in next_version={next_version}'
+                                 f' from inclusive range {hex(start_range)}-{hex(stop_range)}')
+                    if _bisearch(unichar_n, other_table):
+                        log.error(f'value {hex(unichar_n)} in table_name={table_name}'
+                                  f' version={version} is duplicated in other_table_name={other_table_name}'
+                                  f' from inclusive range {hex(start_range)}-{hex(stop_range)}')
+                        errors += 1
+    if errors:
+        log.error(f'{errors} errors, exit 1')
+        exit(1)
+
+
+if __name__ == '__main__':
+    _logfmt = '%(levelname)s %(filename)s:%(lineno)d %(message)s'
+    logging.basicConfig(level="INFO", format=_logfmt, force=True)
+    log = logging.getLogger()
+    main(log)
diff --git a/docs/intro.rst b/docs/intro.rst
@@ -216,6 +216,11 @@ Other Languages
 =======
 History
 =======
+0.2.10 *2023-11-08*
+  * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F
+    Variation Selector 16 (`PR #97`_).
+  * **Updated** `Specification <Specification_from_pypi_>`_.
+
 0.2.9 *2023-10-30*
   * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
     Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).
@@ -319,6 +324,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
 .. _`PR #35`: https://github.com/jquast/wcwidth/pull/35
 .. _`PR #82`: https://github.com/jquast/wcwidth/pull/82
 .. _`PR #91`: https://github.com/jquast/wcwidth/pull/91
+.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
 .. _`jquast/blessed`: https://github.com/jquast/blessed
 .. _`selectel/pyte`: https://github.com/selectel/pyte
 .. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies

diff --git a/docs/specs.rst b/docs/specs.rst
@@ -52,3 +52,7 @@ Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``).
 
 Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is
 present in comment of unicode data file, aprox. 3 characters.
+
+Any character in sequence with U+FE0F (Variation Selector 16) defined by
+Emoji Variation Sequences txt as ``emoji style``.
+
diff --git a/docs/unicode_version.rst b/docs/unicode_version.rst
@@ -121,3 +121,9 @@ release files:
 ``EastAsianWidth-15.1.0.txt``
   *Date: 2023-07-28, 23:34:08 GMT*
 
+``emoji-variation-sequences-12.0.0.txt``
+  *Date: 2019-01-15, 12:10:05 GMT*
+
+``emoji-variation-sequences-15.1.0.txt``
+  *Date: 2023-02-01, 02:22:54 GMT*
+
diff --git a/setup.py b/setup.py
@@ -44,7 +44,7 @@ def main():
     setuptools.setup(
         name='wcwidth',
         # NOTE: manually manage __version__ in wcwidth/__init__.py !
-        version='0.2.9',
+        version='0.2.10',
         description=(
             "Measures the displayed width of unicode strings in a terminal"),
         long_description=codecs.open(