From 467efd180059b3e50b0f4c26a8d3a405b3a5a358 Mon Sep 17 00:00:00 2001 From: sparuc861 Date: Fri, 18 Mar 2022 11:53:13 -0600 Subject: [PATCH 1/2] 11575: Enable auto purge default catalog property --- .../main/java/io/trino/plugin/hive/HiveConfig.java | 13 +++++++++++++ .../io/trino/plugin/hive/HiveTableProperties.java | 2 +- .../java/io/trino/plugin/hive/TestHiveConfig.java | 7 +++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java index b9febc8be3d6..3994121fcbc6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java @@ -165,6 +165,7 @@ public class HiveConfig private boolean sizeBasedSplitWeightsEnabled = true; private double minimumAssignedSplitWeight = 0.05; + private boolean autoPurgeDefault; public boolean isSingleStatementWritesOnly() { @@ -1165,4 +1166,16 @@ public double getMinimumAssignedSplitWeight() { return minimumAssignedSplitWeight; } + + @Config("hive.managed-table-auto-purge-default") + public HiveConfig setAutoPurgeDefault(boolean autoPurgeDefault) + { + this.autoPurgeDefault = autoPurgeDefault; + return this; + } + + public boolean isAutoPurgeDefault() + { + return this.autoPurgeDefault; + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableProperties.java index b98a188b5a39..85b316fa1ff0 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableProperties.java @@ -155,7 +155,7 @@ public HiveTableProperties( stringProperty(CSV_QUOTE, "CSV quote character", null, false), stringProperty(CSV_ESCAPE, "CSV escape character", null, false), booleanProperty(TRANSACTIONAL, "Table is transactional", null, false), - booleanProperty(AUTO_PURGE, "Skip trash when table or partition is deleted", null, false)); + booleanProperty(AUTO_PURGE, "Skip trash when table or partition is deleted", config.isAutoPurgeDefault(), false)); } public List> getTableProperties() diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java index 8d3cb0cd33da..634e8a4cb10c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java @@ -109,7 +109,8 @@ public void testDefaults() .setLegacyHiveViewTranslation(false) .setIcebergCatalogName(null) .setSizeBasedSplitWeightsEnabled(true) - .setMinimumAssignedSplitWeight(0.05)); + .setMinimumAssignedSplitWeight(0.05) + .setAutoPurgeDefault(false)); } @Test @@ -190,6 +191,7 @@ public void testExplicitPropertyMappings() .put("hive.iceberg-catalog-name", "iceberg") .put("hive.size-based-split-weights-enabled", "false") .put("hive.minimum-assigned-split-weight", "1.0") + .put("hive.auto-purge-default", "true") .buildOrThrow(); HiveConfig expected = new HiveConfig() @@ -266,7 +268,8 @@ public void testExplicitPropertyMappings() .setLegacyHiveViewTranslation(true) .setIcebergCatalogName("iceberg") .setSizeBasedSplitWeightsEnabled(false) - .setMinimumAssignedSplitWeight(1.0); + .setMinimumAssignedSplitWeight(1.0) + .setAutoPurgeDefault(true); assertFullMapping(properties, expected); } From d0fafbe01187e68ae004eaf09730c9cec40337d5 Mon Sep 17 00:00:00 2001 From: sparuc861 Date: Wed, 30 Mar 2022 17:03:35 -0600 Subject: [PATCH 2/2] 11575: Update documentation for hive.managed-table-auto-purge-default --- docs/src/main/sphinx/connector/hive.rst | 58 +++++++++++++++---------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/docs/src/main/sphinx/connector/hive.rst b/docs/src/main/sphinx/connector/hive.rst index 5087472379d6..596a539c7a98 100644 --- a/docs/src/main/sphinx/connector/hive.rst +++ b/docs/src/main/sphinx/connector/hive.rst @@ -398,6 +398,16 @@ Property Name Description Set to ``false`` to disable statistics. Disabling statistics means that :doc:`/optimizer/cost-based-optimizations` can not make smart decisions about the query plan. + +``hive.managed-table-auto-purge-default`` Sets default value for managed table property auto_purge`. ``false`` + You can use this property to set auto_purge at catalog level + and skip setting auto_purge table property during managed + table creation. + auto_purge managed table property when set to true overrides + this catalog property and skips trash for the managed table + deletion. + See the :ref:`hive_table_properties` for more information + on auto_purge. ================================================== ============================================================ ============ ORC format configuration properties @@ -1029,8 +1039,8 @@ Table properties supply or set metadata for the underlying tables. This is key for :doc:`/sql/create-table-as` statements. Table properties are passed to the connector using a :doc:`WITH ` clause:: - CREATE TABLE tablename - WITH (format='CSV', + CREATE TABLE tablename + WITH (format='CSV', csv_escape = '"') See the :ref:`hive_examples` for more information. @@ -1043,7 +1053,7 @@ See the :ref:`hive_examples` for more information. - Description - Default * - ``auto_purge`` - - Indicates to the configured metastore to perform a purge when a table or + - Indicates to the configured metastore to perform a purge when a table or partition is deleted instead of a soft deletion using the trash. - * - ``avro_schema_url`` @@ -1054,11 +1064,11 @@ See the :ref:`hive_examples` for more information. ``bucketed_by``. - 0 * - ``bucketed_by`` - - The bucketing column for the storage table. Only valid if used with + - The bucketing column for the storage table. Only valid if used with ``bucket_count``. - ``[]`` * - ``bucketing_version`` - - Specifies which Hive bucketing version to use. Valid values are ``1`` + - Specifies which Hive bucketing version to use. Valid values are ``1`` or ``2``. - * - ``csv_escape`` @@ -1075,51 +1085,51 @@ See the :ref:`hive_examples` for more information. :ref:`hive_examples` for more information. - * - ``format`` - - The table file format. Valid values include ``ORC``, ``PARQUET``, ``AVRO``, - ``RCBINARY``, ``RCTEXT``, ``SEQUENCEFILE``, ``JSON``, ``TEXTFILE``, and - ``CSV``. The catalog property ``hive.storage-format`` sets the default + - The table file format. Valid values include ``ORC``, ``PARQUET``, ``AVRO``, + ``RCBINARY``, ``RCTEXT``, ``SEQUENCEFILE``, ``JSON``, ``TEXTFILE``, and + ``CSV``. The catalog property ``hive.storage-format`` sets the default value and can change it to a different default. - - + - * - ``null_format`` - - The serialization format for ``NULL`` value. Requires TextFile, RCText, + - The serialization format for ``NULL`` value. Requires TextFile, RCText, or SequenceFile format. - * - ``orc_bloom_filter_columns`` - - Comma separated list of columns to use for ORC bloom filter. It improves - the performance of queries using range predicates when reading ORC files. + - Comma separated list of columns to use for ORC bloom filter. It improves + the performance of queries using range predicates when reading ORC files. Requires ORC format. - ``[]`` * - ``orc_bloom_filter_fpp`` - The ORC bloom filters false positive probability. Requires ORC format. - 0.05 * - ``partitioned_by`` - - The partitioning column for the storage table. The columns listed in the - ``partitioned_by`` clause must be the last columns as defined in the DDL. + - The partitioning column for the storage table. The columns listed in the + ``partitioned_by`` clause must be the last columns as defined in the DDL. - ``[]`` * - ``skip_footer_line_count`` - - The number of footer lines to ignore when parsing the file for data. + - The number of footer lines to ignore when parsing the file for data. Requires TextFile or CSV format tables. - - + - * - ``skip_header_line_count`` - - The number of header lines to ignore when parsing the file for data. + - The number of header lines to ignore when parsing the file for data. Requires TextFile or CSV format tables. - * - ``sorted_by`` - - The column to sort by to determine bucketing for row. Only valid if + - The column to sort by to determine bucketing for row. Only valid if ``bucketed_by`` and ``bucket_count`` are specified as well. - ``[]`` * - ``textfile_field_separator`` - - Allows the use of custom field separators, such as '|', for TextFile + - Allows the use of custom field separators, such as '|', for TextFile formatted tables. - - + - * - ``textfile_field_separator_escape`` - Allows the use of a custom escape character for TextFile formatted tables. - - + - * - ``transactional`` - - Set this property to ``true`` to create an ORC ACID transactional table. - Requires ORC format. This property may be shown as true for insert-only + - Set this property to ``true`` to create an ORC ACID transactional table. + Requires ORC format. This property may be shown as true for insert-only tables created using older versions of Hive. - - + - .. _hive_special_columns: