Search in sources :

Example 56 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SqlServerRecordHandlerTest method buildSplitSqlNew.

@Test
public void buildSplitSqlNew() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn("pf");
    Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn("testCol1");
    Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("1");
    ValueSet valueSet = getSingleValueSet("varcharTest");
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol4", valueSet).build());
    String expectedSql = "SELECT `testCol1`, `testCol2`, `testCol3`, `testCol4` FROM `testSchema`.`testTable`  WHERE (`testCol4` = ?) AND  $PARTITION.pf(testCol1) = 1";
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.sqlServerRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, "varcharTest");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 57 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandler method getSchema.

/**
 * Appropriate datatype to arrow type conversions will be done by fetching data types of columns
 * @param jdbcConnection
 * @param tableName
 * @param partitionSchema
 * @return
 * @throws SQLException
 */
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
    String dataTypeQuery = "SELECT C.NAME AS COLUMN_NAME, TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE " + "FROM SYS.COLUMNS C " + "JOIN SYS.TYPES T " + "ON C.USER_TYPE_ID=T.USER_TYPE_ID " + "WHERE C.OBJECT_ID=OBJECT_ID(?)";
    String dataType;
    String columnName;
    HashMap<String, String> hashMap = new HashMap<>();
    boolean found = false;
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
        Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        PreparedStatement stmt = connection.prepareStatement(dataTypeQuery)) {
        // fetch data types of columns and prepare map with column name and datatype.
        stmt.setString(1, tableName.getSchemaName() + "." + tableName.getTableName());
        try (ResultSet dataTypeResultSet = stmt.executeQuery()) {
            while (dataTypeResultSet.next()) {
                dataType = dataTypeResultSet.getString("DATA_TYPE");
                columnName = dataTypeResultSet.getString("COLUMN_NAME");
                hashMap.put(columnName.trim(), dataType.trim());
            }
        }
        while (resultSet.next()) {
            ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
            columnName = resultSet.getString("COLUMN_NAME");
            dataType = hashMap.get(columnName);
            LOGGER.debug("columnName: " + columnName);
            LOGGER.debug("dataType: " + dataType);
            /**
             * Converting date data type into DATEDAY since framework is unable to do it by default
             */
            if ("date".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.DATEDAY.getType();
            }
            /**
             * Converting bit data type into TINYINT because BIT type is showing 0 as false and 1 as true.
             * we can avoid it by changing to TINYINT.
             */
            if ("bit".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.TINYINT.getType();
            }
            /**
             * Converting tinyint data type into SMALLINT.
             * TINYINT range is 0 to 255 in SQL Server, usage of TINYINT(ArrowType) leads to data loss
             * as its using 1 bit as signed flag.
             */
            if ("tinyint".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.SMALLINT.getType();
            }
            /**
             * Converting numeric, smallmoney data types into FLOAT8 to avoid data loss
             * (ex: 123.45 is shown as 123 (loosing its scale))
             */
            if ("numeric".equalsIgnoreCase(dataType) || "smallmoney".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.FLOAT8.getType();
            }
            /**
             * Converting time data type(s) into DATEMILLI since framework is unable to map it by default
             */
            if ("datetime".equalsIgnoreCase(dataType) || "datetime2".equalsIgnoreCase(dataType) || "smalldatetime".equalsIgnoreCase(dataType) || "datetimeoffset".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.DATEMILLI.getType();
            }
            /**
             * converting into VARCHAR for non supported data types.
             */
            if (columnType == null) {
                columnType = Types.MinorType.VARCHAR.getType();
            }
            if (columnType != null && !SupportedTypes.isSupported(columnType)) {
                columnType = Types.MinorType.VARCHAR.getType();
            }
            LOGGER.debug("columnType: " + columnType);
            if (columnType != null && SupportedTypes.isSupported(columnType)) {
                schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                found = true;
            } else {
                LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
            }
        }
        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        // add partition columns
        partitionSchema.getFields().forEach(schemaBuilder::addField);
        return schemaBuilder.build();
    }
}
Also used : HashMap(java.util.HashMap) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Connection(java.sql.Connection) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) PreparedStatement(java.sql.PreparedStatement)

Example 58 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.sqlServerMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement rowCountPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.ROW_COUNT_QUERY)).thenReturn(rowCountPreparedStatement);
    ResultSet rowCountResultSet = mockResultSet(new String[] { "ROW_COUNT" }, new int[] { Types.INTEGER }, new Object[][] { { 2 } }, new AtomicInteger(-1));
    Mockito.when(rowCountPreparedStatement.executeQuery()).thenReturn(rowCountResultSet);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { sqlServerMetadataHandler.PARTITION_NUMBER };
    int[] types = { Types.VARCHAR };
    Object[][] values = { { "2" }, { "3" } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.sqlServerMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    List<String> actualValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        actualValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(Arrays.asList("[PARTITION_NUMBER : 1]", "[PARTITION_NUMBER : 2]", "[PARTITION_NUMBER : 3]"), actualValues);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 59 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SynapseRecordHandlerTest method buildSplitSql.

@Test
public void buildSplitSql() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_COLUMN)).thenReturn("id");
    Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_BOUNDARY_FROM)).thenReturn("100000");
    Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_BOUNDARY_TO)).thenReturn("300000");
    ValueSet valueSet = getSingleValueSet("varcharTest");
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol4", valueSet).build());
    String expectedSql = "SELECT `testCol1`, `testCol2`, `testCol3`, `testCol4` FROM `testSchema`.`testTable`  WHERE (`testCol4` = ?) AND id > 100000 and id <= 300000";
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, "varcharTest");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 60 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SynapseRecordHandlerTest method buildSplitSqlWithPartition.

@Test
public void buildSplitSqlWithPartition() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", "0", "PARTITION_NUMBER", "1", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", "100000"));
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn("0");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn("100000");
    Constraints constraints = Mockito.mock(Constraints.class);
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
    this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", " ", "PARTITION_NUMBER", "1", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", "100000"));
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn(" ");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn("100000");
    this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", "300000", "PARTITION_NUMBER", "2", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", " "));
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn("300000");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn(" ");
    this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", " ", "PARTITION_NUMBER", "2", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", " "));
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn(" ");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
    Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn(" ");
    this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Aggregations

SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)68 Schema (org.apache.arrow.vector.types.pojo.Schema)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)43 Test (org.junit.Test)43 PreparedStatement (java.sql.PreparedStatement)37 ResultSet (java.sql.ResultSet)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)30 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)23 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)23 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)20 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)17 ArrayList (java.util.ArrayList)15 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)12 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)12 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)12 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)11 Connection (java.sql.Connection)10 HashMap (java.util.HashMap)10 ImmutableMap (com.google.common.collect.ImmutableMap)8