use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class ImpalaMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
BlockAllocator blockAllocator = new BlockAllocatorImpl();
String[] schema = { "type", "name" };
Object[][] values = { { "INTEGER", "case_number" }, { "VARCHAR", "case_location" }, { "TIMESTAMP", "case_instance" }, { "DATE", "case_date" } };
AtomicInteger rowNumber = new AtomicInteger(-1);
ResultSet resultSet = mockResultSet(schema, values, rowNumber);
Constraints constraints = Mockito.mock(Constraints.class);
TableName tempTableName = new TableName("testSchema", "testTable");
Schema partitionSchema = this.impalaMetadataHandler.getPartitionSchema("testCatalogName");
Set<String> partitionCols = new HashSet<>(Arrays.asList("partition"));
GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tempTableName, constraints, partitionSchema, partitionCols);
String value2 = "case_date=01-01-2000/case_number=0/case_instance=89898989/case_location=__HIVE_DEFAULT_PARTITION__";
String value3 = "case_date=02-01-2000/case_number=1/case_instance=89898990/case_location=Hyderabad";
String[] columns2 = { "Partition" };
int[] types2 = { Types.VARCHAR };
Object[][] values1 = { { value3 }, { value2 } };
PreparedStatement preparestatement1 = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(ImpalaMetadataHandler.GET_METADATA_QUERY + tempTableName.getTableName().toUpperCase())).thenReturn(preparestatement1);
final String getPartitionDetailsSql = "show files in " + getTableLayoutRequest.getTableName().getTableName().toUpperCase();
Statement statement1 = Mockito.mock(Statement.class);
Mockito.when(this.connection.createStatement()).thenReturn(statement1);
ResultSet resultSet1 = mockResultSet(columns2, types2, values1, new AtomicInteger(-1));
Mockito.when(preparestatement1.executeQuery()).thenReturn(resultSet);
Mockito.when(statement1.executeQuery(getPartitionDetailsSql)).thenReturn(resultSet1);
GetTableLayoutResponse getTableLayoutResponse = this.impalaMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
List<String> expectedValues = new ArrayList<>();
for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
}
Assert.assertEquals(expectedValues.get(0), "[partition : case_date=02-01-2000 and case_number=1 and case_instance=89898990 and case_location='Hyderabad']");
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder("partition", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema expectedSchema = expectedSchemaBuilder.build();
Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
Assert.assertEquals(tempTableName, getTableLayoutResponse.getTableName());
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class CloudwatchMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
logger.info("doGetTableLayout - enter");
when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
DescribeLogStreamsResult result = new DescribeLogStreamsResult();
Integer nextToken;
if (request.getNextToken() == null) {
nextToken = 1;
} else if (Integer.valueOf(request.getNextToken()) < 3) {
nextToken = Integer.valueOf(request.getNextToken()) + 1;
} else {
nextToken = null;
}
List<LogStream> logStreams = new ArrayList<>();
if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
int continuation = request.getNextToken() == null ? 0 : Integer.valueOf(request.getNextToken());
for (int i = 0 + continuation * 100; i < 300; i++) {
LogStream nextLogStream = new LogStream();
nextLogStream.setLogStreamName("table-" + String.valueOf(i));
nextLogStream.setStoredBytes(i * 1000L);
logStreams.add(nextLogStream);
}
}
result.withLogStreams(logStreams);
if (nextToken != null) {
result.setNextToken(String.valueOf(nextToken));
}
return result;
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("log_stream", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("table-10").build());
Schema schema = SchemaBuilder.newBuilder().addStringField("log_stream").build();
GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "queryId", "default", new TableName("schema-1", "all_log_streams"), new Constraints(constraintsMap), schema, Collections.singleton("log_stream"));
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
logger.info("doGetTableLayout - {}", res.getPartitions());
assertTrue(res.getPartitions().getSchema().findField("log_stream") != null);
assertTrue(res.getPartitions().getRowCount() == 1);
verify(mockAwsLogs, times(4)).describeLogStreams(any(DescribeLogStreamsRequest.class));
logger.info("doGetTableLayout - exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class CloudwatchMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() {
logger.info("doGetSplits: enter");
Schema schema = SchemaBuilder.newBuilder().addField(CloudwatchMetadataHandler.LOG_STREAM_FIELD, new ArrowType.Utf8()).addField(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD, new ArrowType.Int(64, true)).addField(CloudwatchMetadataHandler.LOG_GROUP_FIELD, new ArrowType.Utf8()).build();
Block partitions = allocator.createBlock(schema);
int num_partitions = 2_000;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD), i, 2016L + i);
BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_FIELD), i, "log_stream_" + i);
BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_GROUP_FIELD), i, "log_group_" + i);
}
partitions.setRowCount(num_partitions);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName("schema", "all_log_streams"), partitions, Collections.singletonList(CloudwatchMetadataHandler.LOG_STREAM_FIELD), new Constraints(new HashMap<>()), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
for (Split nextSplit : response.getSplits()) {
assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD));
assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_FIELD));
assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_GROUP_FIELD));
}
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertTrue(numContinuations > 0);
logger.info("doGetSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class DocDBMetadataHandlerTest method doGetSplits.
@Test
public void doGetSplits() {
List<String> partitionCols = new ArrayList<>();
Block partitions = BlockUtils.newBlock(allocator, PARTITION_ID, Types.MinorType.INT.getType(), 0);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, partitions, partitionCols, new Constraints(new HashMap<>()), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", new Object[] { continuationToken, response.getSplits().size() });
assertTrue("Continuation criteria violated", response.getSplits().size() == 1);
assertTrue("Continuation criteria violated", response.getContinuationToken() == null);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Constraints in project aws-athena-query-federation by awslabs.
the class DocDBRecordHandlerTest method nestedStructTest.
@Test
public void nestedStructTest() throws Exception {
List<Document> documents = new ArrayList<>();
Document result = new Document();
documents.add(result);
Document listStruct1 = new Document();
listStruct1.put("SomeSubStruct", "someSubStruct1");
List<Document> subList = new ArrayList<>();
Document listSubStruct1 = new Document();
listSubStruct1.put("SomeSubSubStruct", "someSubSubStruct");
subList.add(listSubStruct1);
listStruct1.put("SomeSubList", subList);
Document listStruct2 = new Document();
listStruct2.put("SomeSubStruct1", "someSubStruct2");
List<Document> list = new ArrayList<>();
list.add(listStruct1);
list.add(listStruct1);
Document structWithList = new Document();
structWithList.put("SomeList", list);
Document structWithNullList = new Document();
structWithNullList.put("SomeNullList", null);
Document simpleSubStruct = new Document();
simpleSubStruct.put("SomeSimpleSubStruct", "someSimpleSubStruct");
structWithList.put("SimpleSubStruct", simpleSubStruct);
structWithList.put("SimpleSubStructNullList", structWithNullList);
result.put("ComplexStruct", structWithList);
Document simpleStruct = new Document();
simpleStruct.put("SomeSimpleStruct", "someSimpleStruct");
result.put("SimpleStruct", simpleStruct);
when(mockCollection.find()).thenReturn(mockIterable);
when(mockIterable.limit(anyInt())).thenReturn(mockIterable);
when(mockIterable.maxScan(anyInt())).thenReturn(mockIterable);
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
GetTableRequest req = new GetTableRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME);
GetTableResponse res = mdHandler.doGetTable(allocator, req);
logger.info("doGetTable - {}", res);
when(mockCollection.find(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: query[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.projection(any(Document.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
logger.info("doReadRecordsNoSpill: projection[{}]", invocationOnMock.getArguments()[0]);
return mockIterable;
});
when(mockIterable.batchSize(anyInt())).thenReturn(mockIterable);
when(mockIterable.iterator()).thenReturn(new StubbingCursor(documents.iterator()));
Map<String, ValueSet> constraintsMap = new HashMap<>();
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), TABLE_NAME, res.getSchema(), Split.newBuilder(splitLoc, keyFactory.create()).add(DOCDB_CONN_STR, CONNECTION_STRING).build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
assertTrue(response.getRecordCount() == 1);
String expectedString = "[ComplexStruct : {[SomeList : {{[SomeSubStruct : someSubStruct1]," + "[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}," + "{[SomeSubStruct : someSubStruct1],[SomeSubList : {{[SomeSubSubStruct : someSubSubStruct]}}]}}]," + "[SimpleSubStruct : {[SomeSimpleSubStruct : someSimpleSubStruct]}]," + "[SimpleSubStructNullList : {[SomeNullList : null]}]}], [SimpleStruct : {[SomeSimpleStruct : someSimpleStruct]}]";
assertEquals(expectedString, BlockUtils.rowToString(response.getRecords(), 0));
}
Aggregations