Skip to content

Commit

Permalink
WIP add u8ident
Browse files Browse the repository at this point in the history
Extension for secure utf-8 identifiers, for everybody
seeing identifiable names in UTF-8 encodings, like
a filename in a terminal or UI widget.
Identifiers need to be identifiable, i.e. implement mixed
script detection, and such for a Unicode TR39
Moderately Restrictive restriction level.
Also identifiers are validated and normalized by default, to
be able to compare and find them.
  • Loading branch information
rurban committed Feb 21, 2024
1 parent ae733a0 commit 7bb3283
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 3 deletions.
8 changes: 8 additions & 0 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ ctl/string.i:
$(call expand,$(subst .i,,$@))
ctl/u8string.i:
$(call expand,$(subst .i,,$@))
ctl/u8ident.i:
$(call expand,$(subst .i,,$@))
ctl/map.i:
$(call expand,$(subst .i,,$@),-DT=strint -DPOD)
ctl/unordered_map.i:
Expand Down Expand Up @@ -353,6 +355,12 @@ tests/func/test_stack: .cflags $(COMMON_H) tests/test.h tests/func/digi.hh ct
tests/func/test_string: .cflags $(COMMON_H) tests/test.h ctl/string.h ctl/vector.h \
tests/func/test_string.cc
$(CXX) $(CXXFLAGS) -o $@ [email protected]
tests/func/test_u8string: .cflags $(COMMON_H) tests/test.h ctl/u8string.h ctl/vector.h \
tests/func/test_u8string.cc
$(CXX) $(CXXFLAGS) -o $@ [email protected]
tests/func/test_u8ident: .cflags $(COMMON_H) tests/test.h ctl/u8ident.h ctl/u8string.h \
ctl/vector.h tests/func/test_u8string.cc
$(CXX) $(CXXFLAGS) -o $@ [email protected]
tests/func/test_str_capacity: .cflags $(COMMON_H) tests/test.h ctl/string.h ctl/vector.h \
tests/func/test_str_capacity.cc
$(CXX) $(CXXFLAGS) -o $@ [email protected]
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ all containers in ISO C99/C11:
| [ctl/set.h](docs/set.md) | std::set | set |
| [ctl/stack.h](docs/stack.md) | std::stack | stack |
| [ctl/string.h](docs/string.md) | std::string | str |
| [ctl/u8string.h](docs/u8string.md) | std::string | str |
| [ctl/u8string.h](docs/u8string.md) | std::string ext | u8str |
| [ctl/u8ident.h](docs/u8ident.md) | - | u8id |
| [ctl/vector.h](docs/vector.md) | std::vector | vec |
| [ctl/array.h](docs/array.md) | std::array | arrNNNN |
| [ctl/map.h](docs/map.md) | std::map | map |
Expand Down Expand Up @@ -286,6 +287,7 @@ make ctl/set.i
make ctl/stack.i
make ctl/string.i
make ctl/u8string.i
make ctl/u8ident.i
make ctl/vector.i
make ctl/array.i
make ctl/map.i
Expand Down Expand Up @@ -365,6 +367,7 @@ And in its grandiosity (esp. not header-only):
vector.h: realloc
string.h: vector.h
u8string.h: vector.h ++
u8ident.h: u8string.h
deque.h: realloc (paged)
queue.h: deque.h
stack.h: deque.h
Expand Down
1 change: 1 addition & 0 deletions ctl/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ static inline int JOIN(A, compare)(A *self, A *other)
#undef A
#undef I
#undef T
#undef POD
#else
#undef HOLD
#endif
Expand Down
46 changes: 46 additions & 0 deletions ctl/u8ident.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* POSIX std extension for people using utf-8 identifiers, but
need security. See http://unicode.org/reports/tr39/
Like a kernel filesystem or user database, in a UTF-8 terminal,
wishes to present identifiers, like names, paths or files identifiable.
I.e. normalized and with identifiable characters only. Most don't display
names as puny-code.
Implement the Moderately Restrictive restriction level as default.
* All characters in the string are in the ASCII range, or
* The string is single-script, according to the definition in Section 5.1, or
* The string is covered by any of the following sets of scripts, according to
the definition in TR29 Section 5.1:
Latin + Han + Hiragana + Katakana; or equivalently: Latn + Jpan
Latin + Han + Bopomofo; or equivalently: Latn + Hanb
Latin + Han + Hangul; or equivalently: Latn + Kore, or
* The string is covered by Latin and any one other Recommended script, except Cyrillic, Greek.
* The string must be validated UTF-8 and normalized, and only consist of valid identifier
characters.
Reject violations, optionally warn about confusables.
SPDX-License-Identifier: MIT */

#ifndef __CTL_U8IDENT_H__
#define __CTL_U8IDENT_H__

#ifdef T
#error "Template type T defined for <ctl/u8ident.h>"
#endif

#define HOLD
#define u8id_char8_t u8id
#define vec u8id
#define A u8id
#include <ctl/u8string.h>

// TODO Take my code from cperl, which has stable unicode security for some years.
// I'm also just adding this to my safeclib.
// The only other existing example of proper unicode security is Java.

#undef A
#undef I
#undef T
#undef POD
#undef HOLD

#endif
1 change: 1 addition & 0 deletions ctl/u8string.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ JOIN(A, key_compare)(A* self, A* s)
#ifdef HOLD /* for u8ident.h */
# undef HOLD
#else
# undef POD
# undef T
# undef A
# undef I
Expand Down
5 changes: 4 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ all containers in ISO C99/C11:
| [ctl/set.h](set.md) | std::set | set |
| [ctl/stack.h](stack.md) | std::stack | stack |
| [ctl/string.h](string.md) | std::string | str |
| [ctl/u8string.h](u8string.md) | std::string | str |
| [ctl/u8string.h](u8string.md) | std::string ext | u8str |
| [ctl/u8ident.h](u8ident.md) | - | u8id |
| [ctl/vector.h](vector.md) | std::vector | vec |
| [ctl/array.h](array.md) | std::array | arrNNNN |
| [ctl/map.h](map.md) | std::map | map |
Expand Down Expand Up @@ -286,6 +287,7 @@ make ctl/set.i
make ctl/stack.i
make ctl/string.i
make ctl/u8string.i
make ctl/u8ident.i
make ctl/vector.i
make ctl/array.i
make ctl/map.i
Expand Down Expand Up @@ -365,6 +367,7 @@ And in its grandiosity (esp. not header-only):
vector.h: realloc
string.h: vector.h
u8string.h: vector.h ++
u8ident.h: u8string.h
deque.h: realloc (paged)
queue.h: deque.h
stack.h: deque.h
Expand Down
5 changes: 4 additions & 1 deletion makefile
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,12 @@ tests/func/test_string: .cflags ${COMMON_H} tests/test.h ctl/string.h ctl/vect
tests/func/test_str_capacity: .cflags ${COMMON_H} tests/test.h ctl/string.h ctl/vector.h \
tests/func/test_str_capacity.cc
${CXX} ${CXXFLAGS} -o $@ [email protected]
tests/func/test_u8string: .cflags ${COMMON_H} ctl/u8string.h ctl/string.h \
tests/func/test_u8string: .cflags ${COMMON_H} tests/test.h ctl/u8string.h ctl/vector.h \
tests/func/test_u8string.cc
${CXX} ${CXXFLAGS} -o $@ [email protected]
tests/func/test_u8ident: .cflags ${COMMON_H} tests/test.h ctl/u8ident.h ctl/u8ident.h \
tests/func/test_u8ident.cc
${CXX} ${CFLAGS} -o $@ [email protected]
tests/func/test_vec_capacity: .cflags ${COMMON_H} tests/test.h ctl/vector.h \
tests/func/test_vec_capacity.cc
${CXX} ${CXXFLAGS} -o $@ [email protected]
Expand Down

0 comments on commit 7bb3283

Please sign in to comment.