Search in sources :

Example 96 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 97 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class GetTableLayoutRequestSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    Schema schema = SchemaBuilder.newBuilder().addField("year", new ArrowType.Int(32, true)).addField("month", new ArrowType.Int(32, true)).addField("day", new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
    constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
    constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
    Constraints constraints = new Constraints(constraintsMap);
    expected = new GetTableLayoutRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), constraints, schema, ImmutableSet.of("year", "month", "day"));
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetTableLayoutRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Before(org.junit.Before)

Example 98 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class HiveMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    String[] schema = { "data_type", "col_name" };
    Object[][] values = { { "INTEGER", "case_number" }, { "VARCHAR", "case_location" }, { "TIMESTAMP", "case_instance" }, { "DATE", "case_date" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    String[] columns3 = { "col" };
    int[] types3 = { Types.VARCHAR };
    Object[][] values4 = { { "Partitioned:true" } };
    ResultSet resultSet2 = mockResultSet(columns3, types3, values4, new AtomicInteger(-1));
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tempTableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.hiveMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = new HashSet<>(Arrays.asList("partition"));
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tempTableName, constraints, partitionSchema, partitionCols);
    String value2 = "case_date=01-01-2000/case_number=0/case_instance=89898989/case_location=__HIVE_DEFAULT_PARTITION__";
    String value3 = "case_date=02-01-2000/case_number=1/case_instance=89898990/case_location=Hyderabad";
    String[] columns2 = { "Partition" };
    int[] types2 = { Types.VARCHAR };
    Object[][] values1 = { { value2 }, { value3 } };
    Mockito.when(jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(connection);
    String tableName = getTableLayoutRequest.getTableName().getTableName().toUpperCase();
    final String getPartitionExistsSql = "show table extended like " + tableName;
    final String getPartitionDetailsSql = "show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase();
    Statement statement1 = Mockito.mock(Statement.class);
    PreparedStatement preparestatement1 = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(HiveMetadataHandler.GET_METADATA_QUERY + tableName)).thenReturn(preparestatement1);
    Mockito.when(this.connection.createStatement()).thenReturn(statement1);
    ResultSet resultSet1 = mockResultSet(columns2, types2, values1, new AtomicInteger(-1));
    Mockito.when(preparestatement1.executeQuery()).thenReturn(resultSet);
    Mockito.when(statement1.executeQuery(getPartitionDetailsSql)).thenReturn(resultSet1);
    Mockito.when(statement1.executeQuery(getPartitionExistsSql)).thenReturn(resultSet2);
    Mockito.when(resultSet2.getString(1)).thenReturn("PARTITIONED:true");
    GetTableLayoutResponse getTableLayoutResponse = this.hiveMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    BlockAllocator splitBlockAllocator = new BlockAllocatorImpl();
    GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tempTableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, null);
    GetSplitsResponse getSplitsResponse = this.hiveMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest);
    Assert.assertEquals(2, getSplitsResponse.getSplits().size());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) Test(org.junit.Test)

Example 99 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class HiveMetadataHandlerTest method doGetTableLayoutWithSQLException.

@Test(expected = RuntimeException.class)
public void doGetTableLayoutWithSQLException() throws Exception {
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.hiveMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    Connection connection = Mockito.mock(Connection.class, Mockito.RETURNS_DEEP_STUBS);
    JdbcConnectionFactory jdbcConnectionFactory = Mockito.mock(JdbcConnectionFactory.class);
    Mockito.when(jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(connection);
    Mockito.when(connection.getMetaData().getSearchStringEscape()).thenThrow(new SQLException());
    HiveMetadataHandler implalaMetadataHandler = new HiveMetadataHandler(databaseConnectionConfig, this.secretsManager, this.athena, jdbcConnectionFactory);
    implalaMetadataHandler.doGetTableLayout(Mockito.mock(BlockAllocator.class), getTableLayoutRequest);
}
Also used : JdbcConnectionFactory(com.amazonaws.athena.connectors.jdbc.connection.JdbcConnectionFactory) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) Test(org.junit.Test)

Example 100 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class HiveMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    String[] schema = { "data_type", "col_name" };
    Object[][] values = { { "INTEGER", "case_number" }, { "VARCHAR", "case_location" }, { "TIMESTAMP", "case_instance" }, { "DATE", "case_date" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tempTableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.hiveMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = new HashSet<>(Arrays.asList("partition"));
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tempTableName, constraints, partitionSchema, partitionCols);
    String value2 = "case_date=01-01-2000/case_number=0/case_instance=89898989/case_location=__HIVE_DEFAULT_PARTITION__";
    String value3 = "case_date=02-01-2000/case_number=1/case_instance=89898990/case_location=Hyderabad";
    String[] columns2 = { "Partition" };
    int[] types2 = { Types.VARCHAR };
    Object[][] values1 = { { value2 }, { value3 } };
    String[] columns3 = { "col" };
    int[] types3 = { Types.VARCHAR };
    Object[][] values4 = { { "PARTITIONED:true" } };
    ResultSet resultSet2 = mockResultSet(columns3, types3, values4, new AtomicInteger(-1));
    Mockito.when(jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(connection);
    String tableName = getTableLayoutRequest.getTableName().getTableName().toUpperCase();
    final String getPartitionExistsSql = "show table extended like " + tableName;
    final String getPartitionDetailsSql = "show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase();
    Statement statement1 = Mockito.mock(Statement.class);
    PreparedStatement preparestatement1 = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(HiveMetadataHandler.GET_METADATA_QUERY + tableName)).thenReturn(preparestatement1);
    Mockito.when(this.connection.createStatement()).thenReturn(statement1);
    ResultSet resultSet1 = mockResultSet(columns2, types2, values1, new AtomicInteger(-1));
    Mockito.when(preparestatement1.executeQuery()).thenReturn(resultSet);
    Mockito.when(statement1.executeQuery(getPartitionDetailsSql)).thenReturn(resultSet1);
    Mockito.when(statement1.executeQuery(getPartitionExistsSql)).thenReturn(resultSet2);
    Mockito.when(resultSet2.getString(1)).thenReturn("PARTITIONED:true");
    GetTableLayoutResponse getTableLayoutResponse = this.hiveMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues.get(0), "[partition :  case_date=02-01-2000 and case_number=1 and case_instance=89898990 and case_location='Hyderabad']");
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("partition", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tempTableName, getTableLayoutResponse.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JdbcCredentialProvider(com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider) Test(org.junit.Test)

Aggregations

Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)182 Test (org.junit.Test)172 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)148 Schema (org.apache.arrow.vector.types.pojo.Schema)136 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)64 HashMap (java.util.HashMap)63 Split (com.amazonaws.athena.connector.lambda.domain.Split)59 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)55 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)54 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)50 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)47 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)44 ArrayList (java.util.ArrayList)44 PreparedStatement (java.sql.PreparedStatement)42 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)41 Block (com.amazonaws.athena.connector.lambda.data.Block)36 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)35 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)33 ResultSet (java.sql.ResultSet)32 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)30