{
* @return Accept's result.
*/
R accept(ConnectionSimilarityIndexListImpl connectionSimilarityIndexWrappers, P parameter);
+
+ /**
+ * Accepts an Avro file configuration settings.
+ * @param avroFileFormatSpec Avro connection settings.
+ * @param parameter Additional visitor's parameter.
+ * @return Accept's result.
+ */
+ R accept(AvroFileFormatSpec avroFileFormatSpec, P parameter);
}
\ No newline at end of file
diff --git a/dqops/src/main/java/com/dqops/metadata/search/AbstractSearchVisitor.java b/dqops/src/main/java/com/dqops/metadata/search/AbstractSearchVisitor.java
index 816c1282eb..10b0cd0f86 100644
--- a/dqops/src/main/java/com/dqops/metadata/search/AbstractSearchVisitor.java
+++ b/dqops/src/main/java/com/dqops/metadata/search/AbstractSearchVisitor.java
@@ -79,6 +79,7 @@
import com.dqops.metadata.sources.fileformat.FileFormatSpec;
import com.dqops.metadata.sources.fileformat.FilePathListSpec;
import com.dqops.metadata.sources.fileformat.ParquetFileFormatSpec;
+import com.dqops.metadata.sources.fileformat.avro.AvroFileFormatSpec;
import com.dqops.metadata.sources.fileformat.csv.CsvFileFormatSpec;
import com.dqops.metadata.sources.fileformat.deltalake.DeltaLakeFileFormatSpec;
import com.dqops.metadata.sources.fileformat.iceberg.IcebergFileFormatSpec;
@@ -1380,4 +1381,16 @@ public TreeNodeTraversalResult accept(ConnectionSimilarityIndexWrapperImpl conne
public TreeNodeTraversalResult accept(ConnectionSimilarityIndexListImpl connectionSimilarityIndexWrappers, T parameter) {
return TreeNodeTraversalResult.TRAVERSE_CHILDREN;
}
+
+ /**
+ * Accepts an Avro file configuration settings.
+ *
+ * @param avroFileFormatSpec Avro connection settings.
+ * @param parameter Additional visitor's parameter.
+ * @return Accept's result.
+ */
+ @Override
+ public TreeNodeTraversalResult accept(AvroFileFormatSpec avroFileFormatSpec, T parameter) {
+ return TreeNodeTraversalResult.TRAVERSE_CHILDREN;
+ }
}
diff --git a/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpec.java b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpec.java
index 6349e41974..2a8d813747 100644
--- a/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpec.java
+++ b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpec.java
@@ -11,6 +11,7 @@
import com.dqops.metadata.id.ChildHierarchyNodeFieldMapImpl;
import com.dqops.metadata.id.HierarchyNodeResultVisitor;
import com.dqops.metadata.sources.TableSpec;
+import com.dqops.metadata.sources.fileformat.avro.AvroFileFormatSpec;
import com.dqops.metadata.sources.fileformat.csv.CsvFileFormatSpec;
import com.dqops.metadata.sources.fileformat.deltalake.DeltaLakeFileFormatSpec;
import com.dqops.metadata.sources.fileformat.iceberg.IcebergFileFormatSpec;
@@ -43,6 +44,7 @@ public class FileFormatSpec extends AbstractSpec {
put("csv", o -> o.csv);
put("json", o -> o.json);
put("parquet", o -> o.parquet);
+ put("avro", o -> o.avro);
put("iceberg", o -> o.iceberg);
put("delta_lake", o -> o.deltaLake);
}
@@ -63,6 +65,11 @@ public class FileFormatSpec extends AbstractSpec {
@JsonSerialize(using = IgnoreEmptyYamlSerializer.class)
private ParquetFileFormatSpec parquet;
+ @JsonPropertyDescription("Avro file format specification.")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ @JsonSerialize(using = IgnoreEmptyYamlSerializer.class)
+ private AvroFileFormatSpec avro;
+
@JsonPropertyDescription("Iceberg file format specification.")
@JsonInclude(JsonInclude.Include.NON_EMPTY)
@JsonSerialize(using = IgnoreEmptyYamlSerializer.class)
@@ -132,6 +139,24 @@ public void setParquet(ParquetFileFormatSpec parquet) {
propagateHierarchyIdToField(parquet, "parquet");
}
+ /**
+ * Returns the avro file format specification.
+ * @return Avro file format specification.
+ */
+ public AvroFileFormatSpec getAvro() {
+ return avro;
+ }
+
+ /**
+ * Sets the avro file format specification.
+ * @param avro Avro file format specification.
+ */
+ public void setAvro(AvroFileFormatSpec avro) {
+ setDirtyIf(!Objects.equals(this.avro, avro));
+ this.avro = avro;
+ propagateHierarchyIdToField(avro, "avro");
+ }
+
/**
* Returns the Iceberg table format specification.
* @return Iceberg table format specification.
@@ -196,6 +221,7 @@ public boolean isSetHivePartitioning(DuckdbFilesFormatType duckdbFilesFormatType
case csv: return getCsv() != null && getCsv().getHivePartitioning() != null && getCsv().getHivePartitioning();
case json: return getJson() != null && getJson().getHivePartitioning() != null && getJson().getHivePartitioning();
case parquet: return getParquet() != null && getParquet().getHivePartitioning() != null && getParquet().getHivePartitioning();
+ case avro: return false; // not supported yet by DuckDB Avro extension
}
}
return false;
@@ -222,6 +248,7 @@ public String buildTableOptionsString(DuckdbParametersSpec duckdb, TableSpec tab
case csv: return csv.buildSourceTableOptionsString(filePathList, tableSpec);
case json: return json.buildSourceTableOptionsString(filePathList, tableSpec);
case parquet: return parquet.buildSourceTableOptionsString(filePathList, tableSpec);
+ case avro: return avro.buildSourceTableOptionsString(filePathList, tableSpec);
case iceberg: return iceberg.buildSourceTableOptionsString(filePathList, tableSpec);
case delta_lake: return deltaLake.buildSourceTableOptionsString(filePathList, tableSpec);
default: throw new RuntimeException("Cant create table options string for the given files: " + filePathList);
@@ -238,6 +265,7 @@ public boolean isFormatSetForType(DuckdbFilesFormatType duckdbFilesFormatType){
case csv: return this.getCsv() != null;
case json: return this.getJson() != null;
case parquet: return this.getParquet() != null;
+ case avro: return this.getAvro() != null;
case iceberg: return this.getIceberg() != null;
case delta_lake: return this.getDeltaLake() != null;
default: throw new RuntimeException("The file format is not supported : " + duckdbFilesFormatType);
@@ -273,6 +301,9 @@ public String getFullExtension(DuckdbFilesFormatType duckdbFilesFormatType){
return fileTypeExtension + formatSpec.getCompression().getCompressionExtension();
}
}
+ if (duckdbFilesFormatType.equals(DuckdbFilesFormatType.avro) && getAvro() != null) {
+ return fileTypeExtension; // compression not supported yet in DuckDB
+ }
return fileTypeExtension;
}
@@ -333,6 +364,9 @@ public FileFormatSpec expandAndTrim(SecretValueProvider secretValueProvider, Sec
if (cloned.parquet != null) {
cloned.parquet = cloned.parquet.deepClone();
}
+ if (cloned.avro != null) {
+ cloned.avro = cloned.avro.deepClone();
+ }
if (cloned.iceberg != null) {
cloned.iceberg = cloned.iceberg.deepClone();
}
diff --git a/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpecProvider.java b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpecProvider.java
index 4211689557..45455ded4e 100644
--- a/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpecProvider.java
+++ b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/FileFormatSpecProvider.java
@@ -6,6 +6,7 @@
import com.dqops.connectors.duckdb.fileslisting.aws.AwsConstants;
import com.dqops.connectors.duckdb.fileslisting.azure.AzureConstants;
import com.dqops.metadata.sources.TableSpec;
+import com.dqops.metadata.sources.fileformat.avro.AvroFileFormatSpec;
import com.dqops.metadata.sources.fileformat.csv.CsvFileFormatSpec;
import com.dqops.metadata.sources.fileformat.deltalake.DeltaLakeFileFormatSpec;
import com.dqops.metadata.sources.fileformat.iceberg.IcebergFileFormatSpec;
@@ -145,6 +146,7 @@ private static void fillDefaultFileFormat(FileFormatSpec fileFormatSpec, DuckdbF
case csv: fileFormatSpec.setCsv(new CsvFileFormatSpec()); break;
case json: fileFormatSpec.setJson(new JsonFileFormatSpec()); break;
case parquet: fileFormatSpec.setParquet(new ParquetFileFormatSpec()); break;
+ case avro: fileFormatSpec.setAvro(new AvroFileFormatSpec()); break;
case iceberg: fileFormatSpec.setIceberg(new IcebergFileFormatSpec()); break;
case delta_lake: fileFormatSpec.setDeltaLake(new DeltaLakeFileFormatSpec()); break;
default: throw new RuntimeException("Can't fill default file format for files type: " + duckdbFilesFormatType);
diff --git a/dqops/src/main/java/com/dqops/metadata/sources/fileformat/avro/AvroFileFormatSpec.java b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/avro/AvroFileFormatSpec.java
new file mode 100644
index 0000000000..b9277295a7
--- /dev/null
+++ b/dqops/src/main/java/com/dqops/metadata/sources/fileformat/avro/AvroFileFormatSpec.java
@@ -0,0 +1,99 @@
+package com.dqops.metadata.sources.fileformat.avro;
+
+import com.dqops.core.secrets.SecretValueLookupContext;
+import com.dqops.core.secrets.SecretValueProvider;
+import com.dqops.metadata.basespecs.AbstractSpec;
+import com.dqops.metadata.id.ChildHierarchyNodeFieldMap;
+import com.dqops.metadata.id.ChildHierarchyNodeFieldMapImpl;
+import com.dqops.metadata.id.HierarchyNodeResultVisitor;
+import com.dqops.metadata.sources.TableSpec;
+import com.dqops.metadata.sources.fileformat.TableOptionsFormatter;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonPropertyDescription;
+import com.fasterxml.jackson.databind.PropertyNamingStrategies;
+import com.fasterxml.jackson.databind.annotation.JsonNaming;
+import lombok.EqualsAndHashCode;
+import lombok.experimental.FieldNameConstants;
+
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Csv file format specification for querying data in the Avro format files.
+ */
+@JsonInclude(JsonInclude.Include.NON_NULL)
+@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
+@EqualsAndHashCode(callSuper = true)
+@FieldNameConstants
+public class AvroFileFormatSpec extends AbstractSpec {
+
+ private static final ChildHierarchyNodeFieldMapImpl R visit(HierarchyNodeResultVisitor visitor, P parameter) {
+ return visitor.accept(this, parameter);
+ }
+
+ /**
+ * Creates and returns a deep clone (copy) of this object.
+ */
+ @Override
+ public AvroFileFormatSpec deepClone() {
+ return (AvroFileFormatSpec)super.deepClone();
+ }
+
+ /**
+ * Creates an expanded and trimmed deep copy of the spec.
+ * Configurable properties will be expanded if they contain environment variables or secrets.
+ *
+ * @param secretValueProvider Secret value provider.
+ * @param lookupContext Secret value lookup context used to access shared credentials.
+ * @return Cloned, trimmed and expanded table specification.
+ */
+ public AvroFileFormatSpec expandAndTrim(SecretValueProvider secretValueProvider, SecretValueLookupContext lookupContext) {
+ AvroFileFormatSpec cloned = this.deepClone();
+ return cloned;
+ }
+}
diff --git a/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.json b/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.json
index 0b607a729f..0cac7ab62f 100644
--- a/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.json
+++ b/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.json
@@ -20370,6 +20370,15 @@
}
}
},
+ "AvroFileFormatSpec" : {
+ "type" : "object",
+ "properties" : {
+ "filename" : {
+ "type" : "boolean",
+ "description" : "Whether or not an extra filename column should be included in the result."
+ }
+ }
+ },
"BetweenFloatsRuleParametersSpec" : {
"type" : "object",
"properties" : {
@@ -39393,7 +39402,7 @@
"files_format_type" : {
"type" : "string",
"description" : "Type of source files format for DuckDB.",
- "enum" : [ "csv", "json", "parquet", "iceberg", "delta_lake" ]
+ "enum" : [ "csv", "json", "parquet", "avro", "iceberg", "delta_lake" ]
},
"database" : {
"type" : "string",
@@ -39418,6 +39427,10 @@
"description" : "Parquet file format specification.",
"$ref" : "#/definitions/ParquetFileFormatSpec"
},
+ "avro" : {
+ "description" : "Avro file format specification.",
+ "$ref" : "#/definitions/AvroFileFormatSpec"
+ },
"iceberg" : {
"description" : "Iceberg file format specification.",
"$ref" : "#/definitions/IcebergFileFormatSpec"
@@ -39942,6 +39955,10 @@
"description" : "Parquet file format specification.",
"$ref" : "#/definitions/ParquetFileFormatSpec"
},
+ "avro" : {
+ "description" : "Avro file format specification.",
+ "$ref" : "#/definitions/AvroFileFormatSpec"
+ },
"iceberg" : {
"description" : "Iceberg file format specification.",
"$ref" : "#/definitions/IcebergFileFormatSpec"
diff --git a/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.yaml b/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.yaml
index 059c2e3aa0..f35e2a7afe 100644
--- a/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.yaml
+++ b/dqops/src/main/resources/static/swagger-api/dqops-api-swagger-2.yaml
@@ -17252,6 +17252,13 @@ definitions:
schedule:
description: "Schedule for importing source tables using a CRON scheduler."
$ref: "#/definitions/CronScheduleSpec"
+ AvroFileFormatSpec:
+ type: "object"
+ properties:
+ filename:
+ type: "boolean"
+ description: "Whether or not an extra filename column should be included in\
+ \ the result."
BetweenFloatsRuleParametersSpec:
type: "object"
properties:
@@ -38132,6 +38139,7 @@ definitions:
- "csv"
- "json"
- "parquet"
+ - "avro"
- "iceberg"
- "delta_lake"
database:
@@ -38153,6 +38161,9 @@ definitions:
parquet:
description: "Parquet file format specification."
$ref: "#/definitions/ParquetFileFormatSpec"
+ avro:
+ description: "Avro file format specification."
+ $ref: "#/definitions/AvroFileFormatSpec"
iceberg:
description: "Iceberg file format specification."
$ref: "#/definitions/IcebergFileFormatSpec"
@@ -38626,6 +38637,9 @@ definitions:
parquet:
description: "Parquet file format specification."
$ref: "#/definitions/ParquetFileFormatSpec"
+ avro:
+ description: "Avro file format specification."
+ $ref: "#/definitions/AvroFileFormatSpec"
iceberg:
description: "Iceberg file format specification."
$ref: "#/definitions/IcebergFileFormatSpec"