From 1bc6a1fbba9d2959fc8785235b63891ce37d829c Mon Sep 17 00:00:00 2001 From: ritik Date: Fri, 17 Jan 2025 16:11:43 +0530 Subject: [PATCH] The SQLServer Glue Get Table API was not retrieving data as expected due to a case-sensitivity issue with the PARTITION_NUMBER. By converting the PARTITION_NUMBER column to lowercase, we achieved successful data retrieval through the Glue API. --- .../sqlserver/SqlServerConstants.java | 1 + .../sqlserver/SqlServerMetadataHandler.java | 3 +- .../SqlServerQueryStringBuilder.java | 4 ++- .../SqlServerMetadataHandlerTest.java | 29 ++++++++++--------- .../SqlServerQueryStringBuilderTest.java | 9 +++--- .../sqlserver/SqlServerRecordHandlerTest.java | 3 +- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerConstants.java b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerConstants.java index 4c58be1bba..e081e47838 100644 --- a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerConstants.java +++ b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerConstants.java @@ -25,5 +25,6 @@ public final class SqlServerConstants public static final String DRIVER_CLASS = "com.microsoft.sqlserver.jdbc.SQLServerDriver"; public static final int DEFAULT_PORT = 1433; public static final String SQLSERVER_QUOTE_CHARACTER = "\""; + static final String PARTITION_NUMBER = "partition_number"; private SqlServerConstants() {} } diff --git a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandler.java b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandler.java index 2c39a545aa..bcf01e6842 100644 --- a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandler.java +++ b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandler.java @@ -84,13 +84,14 @@ import java.util.Set; import java.util.stream.Collectors; +import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.PARTITION_NUMBER; + public class SqlServerMetadataHandler extends JdbcMetadataHandler { private static final Logger LOGGER = LoggerFactory.getLogger(SqlServerMetadataHandler.class); static final Map JDBC_PROPERTIES = ImmutableMap.of("databaseTerm", "SCHEMA"); static final String ALL_PARTITIONS = "0"; - static final String PARTITION_NUMBER = "PARTITION_NUMBER"; static final String PARTITION_FUNCTION = "PARTITION_FUNCTION"; static final String PARTITIONING_COLUMN = "PARTITIONING_COLUMN"; diff --git a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilder.java b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilder.java index 6111285ace..ce3694d1b3 100644 --- a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilder.java +++ b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilder.java @@ -30,6 +30,8 @@ import java.util.Collections; import java.util.List; +import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.PARTITION_NUMBER; + public class SqlServerQueryStringBuilder extends JdbcSplitQueryBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(SqlServerQueryStringBuilder.class); @@ -63,7 +65,7 @@ protected List getPartitionWhereClauses(Split split) { String partitionFunction = split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION); String partitioningColumn = split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN); - String partitionNumber = split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER); + String partitionNumber = split.getProperty(PARTITION_NUMBER); //example query: select * from MyPartitionTable where $PARTITION.myRangePF(col1) =2 LOGGER.debug("PARTITION_FUNCTION: {}", partitionFunction); diff --git a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandlerTest.java b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandlerTest.java index e8e46b63fc..d840e21645 100644 --- a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandlerTest.java +++ b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerMetadataHandlerTest.java @@ -70,6 +70,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.PARTITION_NUMBER; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.nullable; @@ -77,7 +78,7 @@ public class SqlServerMetadataHandlerTest extends TestBase { private static final Logger logger = LoggerFactory.getLogger(SqlServerMetadataHandlerTest.class); - private static final Schema PARTITION_SCHEMA = SchemaBuilder.newBuilder().addField("PARTITION_NUMBER", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(); + private static final Schema PARTITION_SCHEMA = SchemaBuilder.newBuilder().addField(PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(); private DatabaseConnectionConfig databaseConnectionConfig = new DatabaseConnectionConfig("testCatalog", SqlServerConstants.NAME, "sqlserver://jdbc:sqlserver://hostname;databaseName=fakedatabase"); private SqlServerMetadataHandler sqlServerMetadataHandler; @@ -109,7 +110,7 @@ public void setup() public void getPartitionSchema() { Assert.assertEquals(SchemaBuilder.newBuilder() - .addField(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(), + .addField(PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(), this.sqlServerMetadataHandler.getPartitionSchema("testCatalogName")); } @@ -132,7 +133,7 @@ public void doGetTableLayout() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {sqlServerMetadataHandler.PARTITION_NUMBER}; + String[] columns = {PARTITION_NUMBER}; int[] types = {Types.VARCHAR}; Object[][] values = {{"2"},{"3"}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -153,7 +154,7 @@ public void doGetTableLayout() Assert.assertEquals(Arrays.asList("[PARTITION_NUMBER : 1:::pf:::pc]","[PARTITION_NUMBER : 2:::pf:::pc]","[PARTITION_NUMBER : 3:::pf:::pc]"), actualValues); SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder(); - expectedSchemaBuilder.addField(FieldBuilder.newBuilder(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); + expectedSchemaBuilder.addField(FieldBuilder.newBuilder(PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); Schema expectedSchema = expectedSchemaBuilder.build(); Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema()); Assert.assertEquals(tableName, getTableLayoutResponse.getTableName()); @@ -176,7 +177,7 @@ public void doGetTableLayoutWithNoPartitions() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {sqlServerMetadataHandler.PARTITION_NUMBER}; + String[] columns = {PARTITION_NUMBER}; int[] types = {Types.VARCHAR}; Object[][] values = {{}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -199,7 +200,7 @@ public void doGetTableLayoutWithNoPartitions() Assert.assertEquals(Arrays.asList("[PARTITION_NUMBER : 0]"), actualValues); SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder(); - expectedSchemaBuilder.addField(FieldBuilder.newBuilder(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); + expectedSchemaBuilder.addField(FieldBuilder.newBuilder(PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); Schema expectedSchema = expectedSchemaBuilder.build(); Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema()); Assert.assertEquals(tableName, getTableLayoutResponse.getTableName()); @@ -248,7 +249,7 @@ public void doGetSplits() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {sqlServerMetadataHandler.PARTITION_NUMBER}; + String[] columns = {PARTITION_NUMBER}; int[] types = {Types.INTEGER}; Object[][] values = {{2}, {3}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -273,15 +274,15 @@ public void doGetSplits() Set> expectedSplits = com.google.common.collect.ImmutableSet.of( com.google.common.collect.ImmutableMap.of( - sqlServerMetadataHandler.PARTITION_NUMBER, "1", + PARTITION_NUMBER, "1", "PARTITIONING_COLUMN", "pc", "PARTITION_FUNCTION", "pf"), com.google.common.collect.ImmutableMap.of( - sqlServerMetadataHandler.PARTITION_NUMBER, "2", + PARTITION_NUMBER, "2", "PARTITIONING_COLUMN", "pc", "PARTITION_FUNCTION", "pf"), com.google.common.collect.ImmutableMap.of( - sqlServerMetadataHandler.PARTITION_NUMBER, "3", + PARTITION_NUMBER, "3", "PARTITIONING_COLUMN", "pc", "PARTITION_FUNCTION", "pf")); Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size()); @@ -310,7 +311,7 @@ public void doGetSplitsWithNoPartition() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {sqlServerMetadataHandler.PARTITION_NUMBER}; + String[] columns = {PARTITION_NUMBER}; int[] types = {Types.INTEGER}; Object[][] values = {{}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -329,7 +330,7 @@ public void doGetSplitsWithNoPartition() GetSplitsResponse getSplitsResponse = this.sqlServerMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest); Set> expectedSplits = new HashSet<>(); - expectedSplits.add(Collections.singletonMap(sqlServerMetadataHandler.PARTITION_NUMBER, "0")); + expectedSplits.add(Collections.singletonMap(PARTITION_NUMBER, "0")); Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size()); Set> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet()); Assert.assertEquals(expectedSplits, actualSplits); @@ -354,7 +355,7 @@ public void doGetSplitsContinuation() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {sqlServerMetadataHandler.PARTITION_NUMBER}; + String[] columns = {PARTITION_NUMBER}; int[] types = {Types.INTEGER}; Object[][] values = {{2},{3}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -375,7 +376,7 @@ public void doGetSplitsContinuation() Set> expectedSplits = com.google.common.collect.ImmutableSet.of( com.google.common.collect.ImmutableMap.of( - sqlServerMetadataHandler.PARTITION_NUMBER, "3", + PARTITION_NUMBER, "3", "PARTITIONING_COLUMN", "pc", "PARTITION_FUNCTION", "pf")); Set> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet()); diff --git a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilderTest.java b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilderTest.java index ea2e2ba116..3b18604e04 100644 --- a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilderTest.java +++ b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerQueryStringBuilderTest.java @@ -29,6 +29,7 @@ import java.util.Collections; import java.util.List; +import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.PARTITION_NUMBER; import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.SQLSERVER_QUOTE_CHARACTER; public class SqlServerQueryStringBuilderTest @@ -61,7 +62,7 @@ public void testGetPartitionWhereClauses() Split split1 = Mockito.mock(Split.class); Mockito.when(split1.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn("pf"); Mockito.when(split1.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn("col"); - Mockito.when(split1.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("1"); + Mockito.when(split1.getProperty(PARTITION_NUMBER)).thenReturn("1"); Assert.assertEquals(Collections.singletonList(" $PARTITION.pf(col) = 1"), builder.getPartitionWhereClauses(split1)); } @@ -73,7 +74,7 @@ public void getPartitionWhereClausesWhenPartitionFunctionIsNull() Split split = Mockito.mock(Split.class); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn(null); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn("col"); - Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("74"); + Mockito.when(split.getProperty(PARTITION_NUMBER)).thenReturn("74"); List result = builder.getPartitionWhereClauses(split); // Should return empty list if partition function is null. Assert.assertEquals(0, result.size()); @@ -86,7 +87,7 @@ public void getPartitionWhereClausesWhenPartitioningColumnIsNull() Split split = Mockito.mock(Split.class); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn("pf"); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn(null); - Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("74"); + Mockito.when(split.getProperty(PARTITION_NUMBER)).thenReturn("74"); List result = builder.getPartitionWhereClauses(split); // Should return empty list if partitioning column is null. Assert.assertEquals(0, result.size()); @@ -99,7 +100,7 @@ public void getPartitionWhereClausesWhenPartitionFunctionAndColumnAreNull() Split split = Mockito.mock(Split.class); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn(null); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn(null); - Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("74"); + Mockito.when(split.getProperty(PARTITION_NUMBER)).thenReturn("74"); List result = builder.getPartitionWhereClauses(split); // Should return empty list if both partition function and partitioning column are null. Assert.assertEquals(0, result.size()); diff --git a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerRecordHandlerTest.java b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerRecordHandlerTest.java index c6f8f659dd..054c7c3ba1 100644 --- a/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerRecordHandlerTest.java +++ b/athena-sqlserver/src/test/java/com/amazonaws/athena/connectors/sqlserver/SqlServerRecordHandlerTest.java @@ -48,6 +48,7 @@ import java.sql.SQLException; import java.util.Collections; +import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.PARTITION_NUMBER; import static com.amazonaws.athena.connectors.sqlserver.SqlServerConstants.SQLSERVER_QUOTE_CHARACTER; import static org.mockito.ArgumentMatchers.nullable; @@ -104,7 +105,7 @@ public void buildSplitSqlNew() Split split = Mockito.mock(Split.class); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn("pf"); Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn("testCol1"); - Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("1"); + Mockito.when(split.getProperty(PARTITION_NUMBER)).thenReturn("1"); ValueSet valueSet = getSingleValueSet("varcharTest"); Constraints constraints = Mockito.mock(Constraints.class);