diff --git a/LogicalTypes.md b/LogicalTypes.md index c50b96b8e..2c802565f 100644 --- a/LogicalTypes.md +++ b/LogicalTypes.md @@ -48,7 +48,18 @@ was converted from an enumerated type in another data model (e.g. Thrift, Avro, Applications using a data model lacking a native enum type should interpret `ENUM` annotated field as a UTF-8 encoded string. -The sort order used for `ENUM`s is `UNSIGNED` byte-wise comparison. +The sort order used for `ENUM` values is unsigned byte-wise comparison. + +### UUID + +`UUID` annotates a 16-byte fixed-length binary. The value is encoded using +big-endian, so that `00112233-4455-6677-8899-aabbccddeeff` is encoded as the +bytes `00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff` +(This example is from [wikipedia's UUID page][wiki-uuid]). + +The sort order used for `UUID` values is unsigned byte-wise comparison. + +[wiki-uuid]: https://en.wikipedia.org/wiki/Universally_unique_identifier ## Numeric Types diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index e7007d37e..58be43b2a 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -226,6 +226,7 @@ struct Statistics { /** Empty structs to use as logical type annotations */ struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md struct EnumType {} // allowed for BINARY, must be encoded with UTF-8