Skip to content

Commit

Permalink
Changes to handle special and non-Unicode characters log2timeline#77
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jan 28, 2024
1 parent 39a8871 commit 5edde69
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions dfimagetools/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,19 @@
NON_PRINTABLE_CHARACTERS.update({
value: f'\\U{value:08x}' for value in range(0xd800, 0xe000)})

# Escape undefined Unicode characters as \U########
NON_PRINTABLE_CHARACTERS.update({value: f'\\U{value:08x}' for value in (
0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,
0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfffe, 0xffff,
0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff, 0x4fffe, 0x4ffff,
0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff, 0x8fffe, 0x8ffff,
0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff, 0xcfffe, 0xcffff,
0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff, 0x10fffe,
0x10ffff)})

# Escape observed non-printable Unicode characters as \U########
NON_PRINTABLE_CHARACTERS.update({value: f'\\U{value:08x}' for value in (
0x2028, 0x2029, 0xe000, 0xf8ff, 0xf0000, 0xffffd, 0x100000, 0x10fffd)})

NON_PRINTABLE_CHARACTER_TRANSLATION_TABLE = str.maketrans(
NON_PRINTABLE_CHARACTERS)

0 comments on commit 5edde69

Please sign in to comment.