Search in sources :

Example 46 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class OracleMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "TESTTABLE");
    Schema partitionSchema = this.oracleMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(OracleMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "PARTITION_NAME" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { { "p0" }, { "p1" } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.oracleMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Arrays.asList("[PARTITION_NAME : p0]", "[PARTITION_NAME : p1]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(OracleMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 47 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class OracleMetadataHandlerTest method doGetTableLayoutWithNoPartitions.

@Test
public void doGetTableLayoutWithNoPartitions() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "TESTTABLE");
    Schema partitionSchema = this.oracleMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(OracleMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "PARTITION_NAME" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { {} };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.oracleMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Collections.singletonList("[PARTITION_NAME : 0]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(OracleMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 48 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class MySqlMetadataHandlerTest method doGetTableLayoutWithNoPartitions.

@Test
public void doGetTableLayoutWithNoPartitions() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.mySqlMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(MySqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "partition_name" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { {} };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.mySqlMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Collections.singletonList("[partition_name : *]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(MySqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(2, tableName.getSchemaName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 49 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandler method getSchema.

/**
 * @param jdbcConnection
 * @param tableName
 * @param partitionSchema
 * @return
 * @throws SQLException
 */
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
    LOGGER.debug("SaphanaMetadataHandler:getSchema starting");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
        Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
        boolean found = false;
        HashMap<String, String> hashMap = new HashMap<String, String>();
        try {
            // fetch data types for columns for appropriate datatype to arrowtype conversions.
            ResultSet dataTypeResultSet = getColumnDatatype(connection, tableName);
            String type = "";
            String name = "";
            while (dataTypeResultSet.next()) {
                type = dataTypeResultSet.getString("DATA_TYPE");
                name = dataTypeResultSet.getString(SaphanaConstants.COLUMN_NAME);
                hashMap.put(name.trim().toLowerCase(), type.trim());
            }
            LOGGER.debug("Data types resolved by column names {}", hashMap);
            if (hashMap.isEmpty() == true) {
                LOGGER.debug("No data type  available for TABLE in hashmap : " + tableName.getTableName());
            }
            while (resultSet.next()) {
                boolean isSpatialDataType = false;
                String columnName = resultSet.getString(SaphanaConstants.COLUMN_NAME);
                LOGGER.debug("SaphanaMetadataHandler:getSchema determining column type of column {}", columnName);
                ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
                LOGGER.debug("SaphanaMetadataHandler:getSchema column type of column {} is {}", columnName, columnType);
                String dataType = hashMap.get(columnName.toLowerCase());
                LOGGER.debug("columnName: " + columnName);
                LOGGER.debug("dataType: " + dataType);
                InferredColumnType inferredColumnType = InferredColumnType.fromType(dataType);
                columnType = inferredColumnType.columnType;
                isSpatialDataType = inferredColumnType.isSpatialType;
                /**
                 * converting into VARCHAR not supported by Framework.
                 */
                if (columnType == null) {
                    columnType = Types.MinorType.VARCHAR.getType();
                }
                if (columnType != null && !SupportedTypes.isSupported(columnType)) {
                    columnType = Types.MinorType.VARCHAR.getType();
                }
                if (columnType != null && SupportedTypes.isSupported(columnType)) {
                    LOGGER.debug("Adding column {} to schema of type {}", columnName, columnType);
                    if (isSpatialDataType) {
                        schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).addField(new Field(quoteColumnName(columnName) + TO_WELL_KNOWN_TEXT_FUNCTION, new FieldType(true, columnType, null), List.of())).build());
                    } else {
                        schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                    }
                    found = true;
                } else {
                    LOGGER.debug("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
                    LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
                }
            }
        } catch (SQLException e) {
            LOGGER.debug("Error getting schema: " + e.getMessage());
            LOGGER.error(e.toString());
        }
        if (!found) {
            LOGGER.error(" Table not found ---  Throwing exception ");
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        partitionSchema.getFields().forEach(schemaBuilder::addField);
        LOGGER.debug(schemaBuilder.toString());
        return schemaBuilder.build();
    }
}
Also used : HashMap(java.util.HashMap) SQLException(java.sql.SQLException) Connection(java.sql.Connection) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) FieldType(org.apache.arrow.vector.types.pojo.FieldType) Field(org.apache.arrow.vector.types.pojo.Field) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet)

Example 50 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandlerTest method doGetTableLayoutWithNoPartitions.

@Test
public void doGetTableLayoutWithNoPartitions() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.saphanaMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(SaphanaConstants.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "PART_ID" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { {} };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.saphanaMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Collections.singletonList("[PART_ID : 0]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Aggregations

SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)68 Schema (org.apache.arrow.vector.types.pojo.Schema)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)43 Test (org.junit.Test)43 PreparedStatement (java.sql.PreparedStatement)37 ResultSet (java.sql.ResultSet)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)30 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)23 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)23 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)20 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)17 ArrayList (java.util.ArrayList)15 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)12 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)12 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)12 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)11 Connection (java.sql.Connection)10 HashMap (java.util.HashMap)10 ImmutableMap (com.google.common.collect.ImmutableMap)8