Search in sources :

Example 66 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class ImpalaMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    String[] schema = { "type", "name" };
    Object[][] values = { { "INTEGER", "case_number" }, { "VARCHAR", "case_location" }, { "TIMESTAMP", "case_instance" }, { "DATE", "case_date" } };
    AtomicInteger rowNumber = new AtomicInteger(-1);
    ResultSet resultSet = mockResultSet(schema, values, rowNumber);
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tempTableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.impalaMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = new HashSet<>(Arrays.asList("partition"));
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tempTableName, constraints, partitionSchema, partitionCols);
    String value2 = "case_date=01-01-2000/case_number=0/case_instance=89898989/case_location=__HIVE_DEFAULT_PARTITION__";
    String value3 = "case_date=02-01-2000/case_number=1/case_instance=89898990/case_location=Hyderabad";
    String[] columns2 = { "Partition" };
    int[] types2 = { Types.VARCHAR };
    Object[][] values1 = { { value3 }, { value2 } };
    PreparedStatement preparestatement1 = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(ImpalaMetadataHandler.GET_METADATA_QUERY + tempTableName.getTableName().toUpperCase())).thenReturn(preparestatement1);
    final String getPartitionDetailsSql = "show files in " + getTableLayoutRequest.getTableName().getTableName().toUpperCase();
    Statement statement1 = Mockito.mock(Statement.class);
    Mockito.when(this.connection.createStatement()).thenReturn(statement1);
    ResultSet resultSet1 = mockResultSet(columns2, types2, values1, new AtomicInteger(-1));
    Mockito.when(preparestatement1.executeQuery()).thenReturn(resultSet);
    Mockito.when(statement1.executeQuery(getPartitionDetailsSql)).thenReturn(resultSet1);
    GetTableLayoutResponse getTableLayoutResponse = this.impalaMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    List<String> expectedValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(expectedValues.get(0), "[partition :  case_date=02-01-2000 and case_number=1 and case_instance=89898990 and case_location='Hyderabad']");
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder("partition", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tempTableName, getTableLayoutResponse.getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Test(org.junit.Test)

Example 67 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class CloudwatchMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    logger.info("doGetTableLayout - enter");
    when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
        DescribeLogStreamsResult result = new DescribeLogStreamsResult();
        Integer nextToken;
        if (request.getNextToken() == null) {
            nextToken = 1;
        } else if (Integer.valueOf(request.getNextToken()) < 3) {
            nextToken = Integer.valueOf(request.getNextToken()) + 1;
        } else {
            nextToken = null;
        }
        List<LogStream> logStreams = new ArrayList<>();
        if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
            int continuation = request.getNextToken() == null ? 0 : Integer.valueOf(request.getNextToken());
            for (int i = 0 + continuation * 100; i < 300; i++) {
                LogStream nextLogStream = new LogStream();
                nextLogStream.setLogStreamName("table-" + String.valueOf(i));
                nextLogStream.setStoredBytes(i * 1000L);
                logStreams.add(nextLogStream);
            }
        }
        result.withLogStreams(logStreams);
        if (nextToken != null) {
            result.setNextToken(String.valueOf(nextToken));
        }
        return result;
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("log_stream", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("table-10").build());
    Schema schema = SchemaBuilder.newBuilder().addStringField("log_stream").build();
    GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "queryId", "default", new TableName("schema-1", "all_log_streams"), new Constraints(constraintsMap), schema, Collections.singleton("log_stream"));
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout - {}", res.getPartitions());
    assertTrue(res.getPartitions().getSchema().findField("log_stream") != null);
    assertTrue(res.getPartitions().getRowCount() == 1);
    verify(mockAwsLogs, times(4)).describeLogStreams(any(DescribeLogStreamsRequest.class));
    logger.info("doGetTableLayout - exit");
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) LogStream(com.amazonaws.services.logs.model.LogStream) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 68 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class CloudwatchMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    logger.info("doGetSplits: enter");
    Schema schema = SchemaBuilder.newBuilder().addField(CloudwatchMetadataHandler.LOG_STREAM_FIELD, new ArrowType.Utf8()).addField(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD, new ArrowType.Int(64, true)).addField(CloudwatchMetadataHandler.LOG_GROUP_FIELD, new ArrowType.Utf8()).build();
    Block partitions = allocator.createBlock(schema);
    int num_partitions = 2_000;
    for (int i = 0; i < num_partitions; i++) {
        BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD), i, 2016L + i);
        BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_FIELD), i, "log_stream_" + i);
        BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_GROUP_FIELD), i, "log_group_" + i);
    }
    partitions.setRowCount(num_partitions);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName("schema", "all_log_streams"), partitions, Collections.singletonList(CloudwatchMetadataHandler.LOG_STREAM_FIELD), new Constraints(new HashMap<>()), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD));
            assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_FIELD));
            assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_GROUP_FIELD));
        }
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertTrue(numContinuations > 0);
    logger.info("doGetSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Split(com.amazonaws.athena.connector.lambda.domain.Split) Test(org.junit.Test)

Example 69 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class DocDBMetadataHandlerTest method doGetSplits.

@Test
public void doGetSplits() {
    List<String> partitionCols = new ArrayList<>();
    Block partitions = BlockUtils.newBlock(allocator, PARTITION_ID, Types.MinorType.INT.getType(), 0);
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
    logger.info("doGetSplits: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    continuationToken = response.getContinuationToken();
    logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
    assertTrue("Continuation criteria violated", response.getSplits().size() == 1);
    assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
Also used : Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) ArrayList(java.util.ArrayList) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 70 with Constraints

use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.

the class DocDBRecordHandlerTest method nestedStructTest.

@Test
public void nestedStructTest() throws Exception {
    List<Document> documents = new ArrayList<>();
    Document result = new Document();
    documents.add(result);
    Document listStruct1 = new Document();
    listStruct1.put("SomeSubStruct", "someSubStruct1");
    List<Document> subList = new ArrayList<>();
    Document listSubStruct1 = new Document();
    listSubStruct1.put("SomeSubSubStruct", "someSubSubStruct");
    subList.add(listSubStruct1);
    listStruct1.put("SomeSubList", subList);
    Document listStruct2 = new Document();
    listStruct2.put("SomeSubStruct1", "someSubStruct2");
    List<Document> list = new ArrayList<>();
    list.add(listStruct1);
    list.add(listStruct1);
    Document structWithList = new Document();
    structWithList.put("SomeList", list);
    Document structWithNullList = new Document();
    structWithNullList.put("SomeNullList", null);
    Document simpleSubStruct = new Document();
    simpleSubStruct.put("SomeSimpleSubStruct", "someSimpleSubStruct");
    structWithList.put("SimpleSubStruct", simpleSubStruct);
    structWithList.put("SimpleSubStructNullList", structWithNullList);
    result.put("ComplexStruct", structWithList);
    Document simpleStruct = new Document();
    simpleStruct.put("SomeSimpleStruct", "someSimpleStruct");
    result.put("SimpleStruct", simpleStruct);
    when(mockCollection.find()).thenReturn(mockIterable);
    when(mockIterable.limit(anyInt())).thenReturn(mockIterable);
    when(mockIterable.maxScan(anyInt())).thenReturn(mockIterable);
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    GetTableRequest req = new GetTableRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME);
    GetTableResponse res = mdHandler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res);
    when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
        return mockIterable;
    });
    when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
    when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, res.getSchema(), Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
    100_000_000_000L, 100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
    assertTrue(response.getRecordCount() == 1);
    String expectedString = "[ComplexStruct : {[SomeList : {{[SomeSubStruct : someSubStruct1]," + "[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}," + "{[SomeSubStruct : someSubStruct1],[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}}]," + "[SimpleSubStruct : {[SomeSimpleSubStruct : someSimpleSubStruct]}]," + "[SimpleSubStructNullList : {[SomeNullList : null]}]}], [SimpleStruct : {[SomeSimpleStruct : someSimpleStruct]}]";
    assertEquals(expectedString, BlockUtils.rowToString(response.getRecords(), 0));
}
Also used : HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Document(org.bson.Document) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Aggregations

Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)182 Test (org.junit.Test)172 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)148 Schema (org.apache.arrow.vector.types.pojo.Schema)136 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)64 HashMap (java.util.HashMap)63 Split (com.amazonaws.athena.connector.lambda.domain.Split)59 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)55 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)54 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)50 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)47 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)44 ArrayList (java.util.ArrayList)44 PreparedStatement (java.sql.PreparedStatement)42 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)41 Block (com.amazonaws.athena.connector.lambda.data.Block)36 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)35 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)33 ResultSet (java.sql.ResultSet)32 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)30