use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() {
logger.info("doReadRecordsNoSpill: enter");
for (int i = 0; i < 2; i++) {
EncryptionKey encryptionKey = (i % 2 == 0) ? keyFactory.create() : null;
logger.info("doReadRecordsNoSpill: Using encryptionKey[" + encryptionKey + "]");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.FLOAT8.getType(), 22.0D)), false));
ReadRecordsRequest request = new ReadRecordsRequest(IdentityUtil.fakeIdentity(), "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(makeSpillLocation(), encryptionKey).add("year", "10").add("month", "10").add("day", "10").build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
ObjectMapperUtil.assertSerialization(request);
RecordResponse rawResponse = recordService.readRecords(request);
ObjectMapperUtil.assertSerialization(rawResponse);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 1);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
logger.info("doReadRecordsNoSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class BlockTest method constrainedBlockTest.
@Test
public void constrainedBlockTest() throws Exception {
Schema schema = SchemaBuilder.newBuilder().addIntField("col1").addIntField("col2").build();
Block block = allocator.createBlock(schema);
ValueSet col1Constraint = EquatableValueSet.newBuilder(allocator, Types.MinorType.INT.getType(), true, false).add(10).build();
Constraints constraints = new Constraints(Collections.singletonMap("col1", col1Constraint));
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, constraints)) {
block.constrain(constraintEvaluator);
assertTrue(block.setValue("col1", 0, 10));
assertTrue(block.offerValue("col1", 0, 10));
assertFalse(block.setValue("col1", 0, 11));
assertFalse(block.offerValue("col1", 0, 11));
assertTrue(block.offerValue("unkown_col", 0, 10));
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class ExampleMetadataHandlerTest method doGetSplits.
/**
* The goal of this test is to test happy case for getting splits and also to exercise the continuation token
* logic specifically.
*/
@Test
public void doGetSplits() {
logger.info("doGetSplits: enter");
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
// This is the schema that ExampleMetadataHandler has layed out for a 'Partition' so we need to populate this
// minimal set of info here.
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(16, false)).addField(monthCol, new ArrowType.Int(16, false)).addField(dayCol, new ArrowType.Int(16, false)).addField(ExampleMetadataHandler.PARTITION_LOCATION, new ArrowType.Utf8()).addField(ExampleMetadataHandler.SERDE, new ArrowType.Utf8()).build();
List<String> partitionCols = new ArrayList<>();
partitionCols.add(yearCol);
partitionCols.add(monthCol);
partitionCols.add(dayCol);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(dayCol, SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 20)), false));
Block partitions = allocator.createBlock(schema);
int num_partitions = 100;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.PARTITION_LOCATION), i, String.valueOf(i));
BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.SERDE), i, "TextInputType");
}
partitions.setRowCount(num_partitions);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IdentityUtil.fakeIdentity(), "queryId", "catalog_name", new TableName("schema", "table_name"), partitions, partitionCols, new Constraints(constraintsMap), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
ObjectMapperUtil.assertSerialization(req);
logger.info("doGetSplits: req[{}]", req);
metadataHandler.setEncryption(numContinuations % 2 == 0);
logger.info("doGetSplits: Toggle encryption " + (numContinuations % 2 == 0));
MetadataResponse rawResponse = metadataHandler.doGetSplits(allocator, req);
ObjectMapperUtil.assertSerialization(rawResponse);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}] - maxSplits[{}]", new Object[] { continuationToken, response.getSplits().size(), MAX_SPLITS_PER_REQUEST });
for (Split nextSplit : response.getSplits()) {
if (numContinuations % 2 == 0) {
assertNotNull(nextSplit.getEncryptionKey());
} else {
assertNull(nextSplit.getEncryptionKey());
}
assertNotNull(nextSplit.getProperty(SplitProperties.LOCATION.getId()));
assertNotNull(nextSplit.getProperty(SplitProperties.SERDE.getId()));
assertNotNull(nextSplit.getProperty(SplitProperties.SPLIT_PART.getId()));
}
assertTrue("Continuation criteria violated", (response.getSplits().size() == MAX_SPLITS_PER_REQUEST && response.getContinuationToken() != null) || response.getSplits().size() < MAX_SPLITS_PER_REQUEST);
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertTrue(numContinuations > 0);
logger.info("doGetSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class ExampleMetadataHandlerTest method doGetTableLayout.
/**
* 200,000,000 million partitions pruned down to 38,000 and transmitted in 25 seconds
*
* @throws Exception
*/
@Test
public void doGetTableLayout() throws Exception {
logger.info("doGetTableLayout - enter");
Schema tableSchema = SchemaBuilder.newBuilder().addIntField("day").addIntField("month").addIntField("year").build();
Set<String> partitionCols = new HashSet<>();
partitionCols.add("day");
partitionCols.add("month");
partitionCols.add("year");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("day", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 20)), false));
constraintsMap.put("month", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 2)), false));
constraintsMap.put("year", SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 1900)), false));
GetTableLayoutRequest req = null;
GetTableLayoutResponse res = null;
try {
req = new GetTableLayoutRequest(IdentityUtil.fakeIdentity(), "queryId", "default", new TableName("schema1", "table1"), new Constraints(constraintsMap), tableSchema, partitionCols);
ObjectMapperUtil.assertSerialization(req);
res = metadataHandler.doGetTableLayout(allocator, req);
ObjectMapperUtil.assertSerialization(res);
logger.info("doGetTableLayout - {}", res);
Block partitions = res.getPartitions();
for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
}
assertTrue(partitions.getRowCount() > 0);
logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
} finally {
try {
req.close();
res.close();
} catch (Exception ex) {
logger.error("doGetTableLayout: ", ex);
}
}
logger.info("doGetTableLayout - exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
List<Result> results = TestUtils.makeResults(100);
ResultScanner mockScanner = mock(ResultScanner.class);
when(mockScanner.iterator()).thenReturn(results.iterator());
when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
return processor.scan(mockScanner);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // 100GB don't expect this to spill
100_000_000_000L, 100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 1);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
}
Aggregations