Search in sources :

Example 6 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class MySqlMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.mySqlMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(MySqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { "partition_name" };
    int[] types = { Types.VARCHAR };
    Object[][] values = { { "p0" }, { "p1" } };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.mySqlMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues, Arrays.asList("[partition_name : p0]", "[partition_name : p1]"));
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(MySqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, tableName.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(2, tableName.getSchemaName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 7 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class JdbcMetadataHandlerTest method doGetTable.

@Test
public void doGetTable() throws SQLException {
    String[] schema = { "DATA_TYPE", "COLUMN_SIZE", "COLUMN_NAME", "DECIMAL_DIGITS", "NUM_PREC_RADIX" };
    Object[][] values = { { Types.INTEGER, 12, "testCol1", 0, 0 }, { Types.VARCHAR, 25, "testCol2", 0, 0 }, { Types.TIMESTAMP, 93, "testCol3", 0, 0 }, { Types.TIMESTAMP_WITH_TIMEZONE, 93, "testCol4", 0, 0 } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol3", org.apache.arrow.vector.types.Types.MinorType.DATEMILLI.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol4", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    PARTITION_SCHEMA.getFields().forEach(expectedSchemaBuilder::addField);
    Schema expected = expectedSchemaBuilder.build();
    TableName inputTableName = new TableName("testSchema", "testTable");
    Mockito.when(connection.getMetaData().getColumns("testCatalog", inputTableName.getSchemaName(), inputTableName.getTableName(), null)).thenReturn(resultSet);
    Mockito.when(connection.getCatalog()).thenReturn("testCatalog");
    GetTableResponse getTableResponse = this.jdbcMetadataHandler.doGetTable(this.blockAllocator, new GetTableRequest(this.federatedIdentity, "testQueryId", "testCatalog", inputTableName));
    Assert.assertEquals(expected, getTableResponse.getSchema());
    Assert.assertEquals(inputTableName, getTableResponse.getTableName());
    Assert.assertEquals("testCatalog", getTableResponse.getCatalogName());
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 8 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class JdbcRecordHandlerTest method readWithConstraint.

@Test
public void readWithConstraint() throws SQLException {
    ConstraintEvaluator constraintEvaluator = Mockito.mock(ConstraintEvaluator.class);
    Mockito.when(constraintEvaluator.apply(Mockito.anyString(), Mockito.any())).thenReturn(true);
    TableName inputTableName = new TableName("testSchema", "testTable");
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testPartitionCol", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema fieldSchema = expectedSchemaBuilder.build();
    BlockAllocator allocator = new BlockAllocatorImpl();
    S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder().withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(s3SpillLocation, null).add("testPartitionCol", String.valueOf("testPartitionValue"));
    Constraints constraints = Mockito.mock(Constraints.class, Mockito.RETURNS_DEEP_STUBS);
    String[] schema = { "testCol1", "testCol2" };
    int[] columnTypes = { Types.INTEGER, Types.VARCHAR };
    Object[][] values = { { 1, "testVal1" }, { 2, "testVal2" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, columnTypes, values, rowNumber);
    Mockito.when(this.preparedStatement.executeQuery()).thenReturn(resultSet);
    SpillConfig spillConfig = Mockito.mock(SpillConfig.class);
    Mockito.when(spillConfig.getSpillLocation()).thenReturn(s3SpillLocation);
    BlockSpiller s3Spiller = new S3BlockSpiller(this.amazonS3, spillConfig, allocator, fieldSchema, constraintEvaluator);
    ReadRecordsRequest readRecordsRequest = new ReadRecordsRequest(this.federatedIdentity, "testCatalog", "testQueryId", inputTableName, fieldSchema, splitBuilder.build(), constraints, 1024, 1024);
    Mockito.when(amazonS3.putObject(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())).thenAnswer((Answer<PutObjectResult>) invocation -> {
        ByteArrayInputStream byteArrayInputStream = (ByteArrayInputStream) invocation.getArguments()[2];
        int n = byteArrayInputStream.available();
        byte[] bytes = new byte[n];
        byteArrayInputStream.read(bytes, 0, n);
        String data = new String(bytes, StandardCharsets.UTF_8);
        Assert.assertTrue(data.contains("testVal1") || data.contains("testVal2") || data.contains("testPartitionValue"));
        return new PutObjectResult();
    });
    this.jdbcRecordHandler.readWithConstraint(s3Spiller, readRecordsRequest, queryStatusChecker);
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) Connection(java.sql.Connection) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) com.amazonaws.athena.connector.lambda.data.writers.extractors(com.amazonaws.athena.connector.lambda.data.writers.extractors) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) Answer(org.mockito.stubbing.Answer) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) SQLException(java.sql.SQLException) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) ByteArrayInputStream(java.io.ByteArrayInputStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestBase(com.amazonaws.athena.connectors.jdbc.TestBase) AmazonS3(com.amazonaws.services.s3.AmazonS3) QueryStatusChecker(com.amazonaws.athena.connector.lambda.QueryStatusChecker) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) FederatedIdentity(com.amazonaws.athena.connector.lambda.security.FederatedIdentity) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetSecretValueResult(com.amazonaws.services.secretsmanager.model.GetSecretValueResult) Before(org.junit.Before) AmazonAthena(com.amazonaws.services.athena.AmazonAthena) Split(com.amazonaws.athena.connector.lambda.domain.Split) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) AWSSecretsManager(com.amazonaws.services.secretsmanager.AWSSecretsManager) Test(org.junit.Test) DatabaseConnectionConfig(com.amazonaws.athena.connectors.jdbc.connection.DatabaseConnectionConfig) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) StandardCharsets(java.nio.charset.StandardCharsets) FieldBuilder(com.amazonaws.athena.connector.lambda.data.FieldBuilder) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Mockito(org.mockito.Mockito) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) Assert(org.junit.Assert) JdbcConnectionFactory(com.amazonaws.athena.connectors.jdbc.connection.JdbcConnectionFactory) Collections(java.util.Collections) GetSecretValueRequest(com.amazonaws.services.secretsmanager.model.GetSecretValueRequest) Types(java.sql.Types) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) Schema(org.apache.arrow.vector.types.pojo.Schema) ConstraintEvaluator(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) SpillConfig(com.amazonaws.athena.connector.lambda.data.SpillConfig) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) Split(com.amazonaws.athena.connector.lambda.domain.Split) S3BlockSpiller(com.amazonaws.athena.connector.lambda.data.S3BlockSpiller) BlockSpiller(com.amazonaws.athena.connector.lambda.data.BlockSpiller) Test(org.junit.Test)

Example 9 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class PostGreSqlMetadataHandlerTest method doGetTableWithArrayColumns.

@Test
public void doGetTableWithArrayColumns() throws Exception {
    logger.info("doGetTableWithArrayColumns - enter");
    String[] schema = { "DATA_TYPE", "COLUMN_NAME", "COLUMN_SIZE", "DECIMAL_DIGITS", "TYPE_NAME" };
    Object[][] values = { { Types.ARRAY, "bool_array", 0, 0, "_bool" }, { Types.ARRAY, "smallint_array", 0, 0, "_int2" }, { Types.ARRAY, "int_array", 0, 0, "_int4" }, { Types.ARRAY, "bigint_array", 0, 0, "_int8" }, { Types.ARRAY, "float_array", 0, 0, "_float4" }, { Types.ARRAY, "double_array", 0, 0, "_float8" }, { Types.ARRAY, "date_array", 0, 0, "_date" }, { Types.ARRAY, "timestamp_array", 0, 0, "_timestamp" }, { Types.ARRAY, "binary_array", 0, 0, "_bytea" }, { Types.ARRAY, "decimal_array", 38, 2, "_numeric" }, { Types.ARRAY, "string_array", 0, 0, "_text" }, { Types.ARRAY, "uuid_array", 0, 0, "_uuid" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addListField("bool_array", new ArrowType.Bool()).addListField("smallint_array", new ArrowType.Int(16, true)).addListField("int_array", new ArrowType.Int(32, true)).addListField("bigint_array", new ArrowType.Int(64, true)).addListField("float_array", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)).addListField("double_array", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)).addListField("date_array", new ArrowType.Date(DateUnit.DAY)).addListField("timestamp_array", new ArrowType.Date(DateUnit.MILLISECOND)).addListField("binary_array", new ArrowType.Utf8()).addListField("decimal_array", new ArrowType.Decimal(38, 2)).addListField("string_array", new ArrowType.Utf8()).addListField("uuid_array", new ArrowType.Utf8());
    postGreSqlMetadataHandler.getPartitionSchema("testCatalog").getFields().forEach(expectedSchemaBuilder::addField);
    Schema expected = expectedSchemaBuilder.build();
    TableName inputTableName = new TableName("testSchema", "testTable");
    Mockito.when(connection.getMetaData().getColumns("testCatalog", inputTableName.getSchemaName(), inputTableName.getTableName(), null)).thenReturn(resultSet);
    Mockito.when(connection.getCatalog()).thenReturn("testCatalog");
    GetTableResponse getTableResponse = this.postGreSqlMetadataHandler.doGetTable(new BlockAllocatorImpl(), new GetTableRequest(this.federatedIdentity, "testQueryId", "testCatalog", inputTableName));
    logger.info("Schema: {}", getTableResponse.getSchema());
    Assert.assertEquals(expected, getTableResponse.getSchema());
    Assert.assertEquals(inputTableName, getTableResponse.getTableName());
    Assert.assertEquals("testCatalog", getTableResponse.getCatalogName());
    logger.info("doGetTableWithArrayColumns - exit");
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 10 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class OracleMetadataHandlerTest method doGetTable.

@Test
public void doGetTable() throws SQLException {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    String[] schema = { "DATA_TYPE", "COLUMN_SIZE", "COLUMN_NAME", "DECIMAL_DIGITS", "NUM_PREC_RADIX" };
    Object[][] values = { { Types.INTEGER, 12, "testCol1", 0, 0 }, { Types.VARCHAR, 25, "testCol2", 0, 0 }, { Types.TIMESTAMP, 93, "testCol3", 0, 0 }, { Types.TIMESTAMP_WITH_TIMEZONE, 93, "testCol4", 0, 0 } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol1", org.apache.arrow.vector.types.Types.MinorType.INT.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol3", org.apache.arrow.vector.types.Types.MinorType.DATEMILLI.getType()).build());
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol4", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    PARTITION_SCHEMA.getFields().forEach(expectedSchemaBuilder::addField);
    Schema expected = expectedSchemaBuilder.build();
    TableName inputTableName = new TableName("TESTSCHEMA", "TESTTABLE");
    Mockito.when(connection.getMetaData().getColumns("testCatalog", inputTableName.getSchemaName(), inputTableName.getTableName(), null)).thenReturn(resultSet);
    Mockito.when(connection.getCatalog()).thenReturn("testCatalog");
    GetTableResponse getTableResponse = this.oracleMetadataHandler.doGetTable(blockAllocator, new GetTableRequest(this.federatedIdentity, "testQueryId", "testCatalog", inputTableName));
    Assert.assertEquals(expected, getTableResponse.getSchema());
    Assert.assertEquals(inputTableName, getTableResponse.getTableName());
    Assert.assertEquals("testCatalog", getTableResponse.getCatalogName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Aggregations

SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)68 Schema (org.apache.arrow.vector.types.pojo.Schema)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)43 Test (org.junit.Test)43 PreparedStatement (java.sql.PreparedStatement)37 ResultSet (java.sql.ResultSet)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)30 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)23 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)23 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)20 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)17 ArrayList (java.util.ArrayList)15 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)12 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)12 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)12 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)11 Connection (java.sql.Connection)10 HashMap (java.util.HashMap)10 ImmutableMap (com.google.common.collect.ImmutableMap)8