Search in sources :

Example 11 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class RedshiftRecordHandlerTest method buildSplitSqlTest.

@Test
public void buildSplitSqlTest() throws SQLException {
    logger.info("buildSplitSqlTest - enter");
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.VARCHAR.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.BIGINT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.FLOAT4.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol5", Types.MinorType.SMALLINT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol6", Types.MinorType.TINYINT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol7", Types.MinorType.FLOAT8.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol8", Types.MinorType.BIT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol9", new ArrowType.Decimal(8, 2)).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition_schema_name", Types.MinorType.VARCHAR.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition_name", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperties()).thenReturn(ImmutableMap.of("partition_schema_name", "s0", "partition_name", "p0"));
    Mockito.when(split.getProperty(Mockito.eq(PostGreSqlMetadataHandler.BLOCK_PARTITION_SCHEMA_COLUMN_NAME))).thenReturn("s0");
    Mockito.when(split.getProperty(Mockito.eq(PostGreSqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME))).thenReturn("p0");
    Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1a.isSingleValue()).thenReturn(true);
    Mockito.when(range1a.getLow().getValue()).thenReturn(1);
    Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1b.isSingleValue()).thenReturn(true);
    Mockito.when(range1b.getLow().getValue()).thenReturn(2);
    ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
    ValueSet valueSet2 = getRangeSet(Marker.Bound.EXACTLY, "1", Marker.Bound.BELOW, "10");
    ValueSet valueSet3 = getRangeSet(Marker.Bound.ABOVE, 2L, Marker.Bound.EXACTLY, 20L);
    ValueSet valueSet4 = getSingleValueSet(1.1F);
    ValueSet valueSet5 = getSingleValueSet(1);
    ValueSet valueSet6 = getSingleValueSet(0);
    ValueSet valueSet7 = getSingleValueSet(1.2d);
    ValueSet valueSet8 = getSingleValueSet(true);
    ValueSet valueSet9 = getSingleValueSet(BigDecimal.valueOf(12.34));
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol1", valueSet1).put("testCol2", valueSet2).put("testCol3", valueSet3).put("testCol4", valueSet4).put("testCol5", valueSet5).put("testCol6", valueSet6).put("testCol7", valueSet7).put("testCol8", valueSet8).put("testCol9", valueSet9).build());
    String expectedSql = "SELECT \"testCol1\", \"testCol2\", \"testCol3\", \"testCol4\", \"testCol5\", \"testCol6\", \"testCol7\", \"testCol8\", \"testCol9\" FROM \"s0\".\"p0\"  WHERE (\"testCol1\" IN (?,?)) AND ((\"testCol2\" >= ? AND \"testCol2\" < ?)) AND ((\"testCol3\" > ? AND \"testCol3\" <= ?)) AND (\"testCol4\" = ?) AND (\"testCol5\" = ?) AND (\"testCol6\" = ?) AND (\"testCol7\" = ?) AND (\"testCol8\" = ?) AND (\"testCol9\" = ?)";
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.redshiftRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
    Mockito.verify(preparedStatement, Mockito.times(1)).setInt(1, 1);
    Mockito.verify(preparedStatement, Mockito.times(1)).setInt(2, 2);
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(3, "1");
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(4, "10");
    Mockito.verify(preparedStatement, Mockito.times(1)).setLong(5, 2L);
    Mockito.verify(preparedStatement, Mockito.times(1)).setLong(6, 20L);
    Mockito.verify(preparedStatement, Mockito.times(1)).setFloat(7, 1.1F);
    Mockito.verify(preparedStatement, Mockito.times(1)).setShort(8, (short) 1);
    Mockito.verify(preparedStatement, Mockito.times(1)).setByte(9, (byte) 0);
    Mockito.verify(preparedStatement, Mockito.times(1)).setDouble(10, 1.2d);
    Mockito.verify(preparedStatement, Mockito.times(1)).setBoolean(11, true);
    Mockito.verify(preparedStatement, Mockito.times(1)).setBigDecimal(12, BigDecimal.valueOf(12.34));
    logger.info("buildSplitSqlTest - exit");
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) PreparedStatement(java.sql.PreparedStatement) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ImmutableMap(com.google.common.collect.ImmutableMap) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 12 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SnowflakeMetadataHandler method getSchema.

/**
 * @param jdbcConnection
 * @param tableName
 * @param partitionSchema
 * @return
 * @throws SQLException
 */
public Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
    /**
     * query to fetch column data type to handle appropriate datatype to arrowtype conversions.
     */
    String dataTypeQuery = "select COLUMN_NAME, DATA_TYPE from \"INFORMATION_SCHEMA\".\"COLUMNS\" WHERE TABLE_SCHEMA=? AND TABLE_NAME=?";
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
        Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        PreparedStatement stmt = connection.prepareStatement(dataTypeQuery)) {
        stmt.setString(1, tableName.getSchemaName().toUpperCase());
        stmt.setString(2, tableName.getTableName().toUpperCase());
        boolean found = false;
        HashMap<String, String> hashMap = new HashMap<String, String>();
        try {
            ResultSet dataTypeResultSet = stmt.executeQuery();
            String type = "";
            String name = "";
            while (dataTypeResultSet.next()) {
                type = dataTypeResultSet.getString("DATA_TYPE");
                name = dataTypeResultSet.getString(COLUMN_NAME);
                hashMap.put(name.trim(), type.trim());
            }
            if (hashMap.isEmpty() == true) {
                LOGGER.debug("No data type  available for TABLE in hashmap : " + tableName.getTableName());
            }
            while (resultSet.next()) {
                ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
                String columnName = resultSet.getString(COLUMN_NAME);
                String dataType = hashMap.get(columnName);
                LOGGER.debug("columnName: " + columnName);
                LOGGER.debug("dataType: " + dataType);
                if (dataType != null && (dataType.equalsIgnoreCase("DECIMAL"))) {
                    columnType = Types.MinorType.BIGINT.getType();
                }
                if (dataType != null && (dataType.equalsIgnoreCase("INTEGER"))) {
                    columnType = Types.MinorType.INT.getType();
                }
                if (dataType != null && (dataType.equalsIgnoreCase("DATE"))) {
                    columnType = Types.MinorType.TIMESTAMPMILLI.getType();
                }
                /**
                 * Converting TIMESTAMP data type into TIMESTAMPMILLI
                 */
                if (dataType != null && (dataType.equalsIgnoreCase("TIMESTAMP"))) {
                    columnType = Types.MinorType.TIMESTAMPMILLI.getType();
                }
                if (dataType != null && (dataType.equalsIgnoreCase("TIMESTAMP_NTZ"))) {
                    LOGGER.debug(" inside TIMESTAMP ");
                    columnType = Types.MinorType.TIMESTAMPMILLI.getType();
                }
                /**
                 * converting into VARCHAR for not supported data types.
                 */
                if (columnType == null) {
                    columnType = Types.MinorType.VARCHAR.getType();
                }
                if (columnType != null && !SupportedTypes.isSupported(columnType)) {
                    columnType = Types.MinorType.VARCHAR.getType();
                }
                if (columnType != null && SupportedTypes.isSupported(columnType)) {
                    LOGGER.debug(" AddField Schema Building...()  ");
                    schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                    found = true;
                } else {
                    LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
                }
            }
        } catch (SQLException e) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        partitionSchema.getFields().forEach(schemaBuilder::addField);
        LOGGER.debug(schemaBuilder.toString());
        return schemaBuilder.build();
    }
}
Also used : HashMap(java.util.HashMap) SQLException(java.sql.SQLException) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Connection(java.sql.Connection) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) PreparedStatement(java.sql.PreparedStatement)

Example 13 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.saphanaMetadataHandler.getPartitionSchema("testCatalogName");
    // partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    Set<String> partitionCols = new HashSet<>(Arrays.asList("PART_ID"));
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(SaphanaConstants.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "PART_ID" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { { "p0" }, { "p1" } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.saphanaMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Arrays.asList("[PART_ID : p0]", "[PART_ID : p1]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(SaphanaConstants.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(2, tableName.getSchemaName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 14 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SaphanaMetadataHandlerTest method doGetTable.

@Test
public void doGetTable() throws Exception {
    String[] schema = { "DATA_TYPE", "COLUMN_SIZE", "COLUMN_NAME", "DECIMAL_DIGITS", "NUM_PREC_RADIX" };
    Object[][] values = { { Types.INTEGER, 12, "testCol1", 0, 0 }, { Types.VARCHAR, 25, "testCol2", 0, 0 }, { Types.TIMESTAMP, 93, "testCol3", 0, 0 }, { Types.TIMESTAMP_WITH_TIMEZONE, 93, "testCol4", 0, 0 } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol3", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol4", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    PARTITION_SCHEMA.getFields().forEach(expectedSchemaBuilder::addField);
    Schema expected = expectedSchemaBuilder.build();
    TableName inputTableName = new TableName("testSchema", "testTable");
    Mockito.when(connection.getMetaData().getColumns("testCatalog", inputTableName.getSchemaName(), inputTableName.getTableName(), null)).thenReturn(resultSet);
    Mockito.when(connection.getCatalog()).thenReturn("testCatalog");
    GetTableResponse getTableResponse = this.saphanaMetadataHandler.doGetTable(this.blockAllocator, new GetTableRequest(this.federatedIdentity, "testQueryId", "testCatalog", inputTableName));
    Assert.assertEquals(expected, getTableResponse.getSchema());
    Assert.assertEquals(inputTableName, getTableResponse.getTableName());
    Assert.assertEquals("testCatalog", getTableResponse.getCatalogName());
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Schema(org.apache.arrow.vector.types.pojo.Schema) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 15 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class RedshiftMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.redshiftMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "child_schema", "child" };
    int[] types = { Types.VARCHAR, Types.VARCHAR };
    Object[][] values = { { "s0", "p0" }, { "s1", "p1" } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.redshiftMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Arrays.asList("[partition_schema_name : s0], [partition_name : p0]", "[partition_schema_name : s1], [partition_name : p1]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(PostGreSqlMetadataHandler.BLOCK_PARTITION_SCHEMA_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(PostGreSqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getSchemaName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(2, tableName.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Aggregations

SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)68 Schema (org.apache.arrow.vector.types.pojo.Schema)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)43 Test (org.junit.Test)43 PreparedStatement (java.sql.PreparedStatement)37 ResultSet (java.sql.ResultSet)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)30 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)23 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)23 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)20 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)17 ArrayList (java.util.ArrayList)15 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)12 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)12 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)12 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)11 Connection (java.sql.Connection)10 HashMap (java.util.HashMap)10 ImmutableMap (com.google.common.collect.ImmutableMap)8