use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandlerTest method doReadRecordsNoSpill.
@Test
public void doReadRecordsNoSpill() throws Exception {
logger.info("doReadRecordsNoSpill: enter");
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("time", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 100L)), false));
ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
100_000_000_000L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof ReadRecordsResponse);
ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
assertTrue(response.getRecords().getRowCount() == 3);
logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
logger.info("doReadRecordsNoSpill: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DataLakeRecordHandlerTest method getSingleValueSet.
private ValueSet getSingleValueSet(Object value) {
Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range.isSingleValue()).thenReturn(true);
Mockito.when(range.getLow().getValue()).thenReturn(value);
ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
return valueSet;
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandlerTest method doGetTableLayoutScanWithTypeOverride.
@Test
public void doGetTableLayoutScanWithTypeOverride() throws Exception {
List<Column> columns = new ArrayList<>();
columns.add(new Column().withName("col1").withType("int"));
columns.add(new Column().withName("col2").withType("timestamptz"));
columns.add(new Column().withName("col3").withType("string"));
Map<String, String> param = ImmutableMap.of(SOURCE_TABLE_PROPERTY, TEST_TABLE, COLUMN_NAME_MAPPING_PROPERTY, "col1=Col1", DATETIME_FORMAT_MAPPING_PROPERTY, "col1=datetime1,col3=datetime3 ");
Table table = new Table().withParameters(param).withPartitionKeys().withStorageDescriptor(new StorageDescriptor().withColumns(columns));
GetTableResult mockResult = new GetTableResult().withTable(table);
when(glueClient.getTable(any())).thenReturn(mockResult);
TableName tableName = new TableName(DEFAULT_SCHEMA, "glueTableForTestTable");
GetTableRequest getTableRequest = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName);
GetTableResponse getTableResponse = handler.doGetTable(allocator, getTableRequest);
logger.info("validateSourceTableNamePropagation: GetTableResponse[{}]", getTableResponse);
Map<String, String> customMetadata = getTableResponse.getSchema().getCustomMetadata();
assertThat(customMetadata.get(SOURCE_TABLE_PROPERTY), equalTo(TEST_TABLE));
assertThat(customMetadata.get(DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED), equalTo("Col1=datetime1,col3=datetime3"));
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
constraintsMap.put("col2", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName, new Constraints(constraintsMap), getTableResponse.getSchema(), Collections.EMPTY_SET);
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, getTableLayoutRequest);
logger.info("doGetTableLayoutScanWithTypeOverride schema - {}", res.getPartitions().getSchema());
logger.info("doGetTableLayoutScanWithTypeOverride partitions - {}", res.getPartitions());
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(SCAN_PARTITION_TYPE));
// no hash key constraints, so look for segment count column
assertThat(res.getPartitions().getSchema().findField(SEGMENT_COUNT_METADATA) != null, is(true));
assertThat(res.getPartitions().getRowCount(), equalTo(1));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(NON_KEY_FILTER_METADATA), equalTo("(#col3 = :v0 OR attribute_not_exists(#col3) OR #col3 = :v1)"));
ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col3", "col3", "#col2", "col2");
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(true), ":v1", ItemUtils.toAttributeValue(null));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandlerTest method doGetTableLayoutScan.
@Test
public void doGetTableLayoutScan() throws Exception {
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col_3", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
GetTableLayoutRequest req = new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, new TableName(TEST_CATALOG_NAME, TEST_TABLE), new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET);
GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
logger.info("doGetTableLayout schema - {}", res.getPartitions().getSchema());
logger.info("doGetTableLayout partitions - {}", res.getPartitions());
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(SCAN_PARTITION_TYPE));
// no hash key constraints, so look for segment count column
assertThat(res.getPartitions().getSchema().findField(SEGMENT_COUNT_METADATA) != null, is(true));
assertThat(res.getPartitions().getRowCount(), equalTo(1));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(NON_KEY_FILTER_METADATA), equalTo("(#col_3 = :v0 OR attribute_not_exists(#col_3) OR #col_3 = :v1)"));
ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col_3", "col_3");
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(true), ":v1", ItemUtils.toAttributeValue(null));
assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class DynamoDBMetadataHandler method getPartitions.
/**
* Generates hash key partitions if possible or generates a single partition with the heuristically
* determined optimal scan segment count specified inside of it
*
* @see GlueMetadataHandler
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
// TODO consider caching this repeated work in #enhancePartitionSchema
// use the source table name from the schema if available (in case Glue table name != actual table name)
String tableName = getSourceTableName(request.getSchema());
if (tableName == null) {
tableName = request.getTableName().getTableName();
}
DynamoDBTable table = tableResolver.getTableMetadata(tableName);
Map<String, ValueSet> summary = request.getConstraints().getSummary();
List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
logger.info("using index: {}", index.getName());
String hashKeyName = index.getHashKey();
ValueSet hashKeyValueSet = summary.get(hashKeyName);
List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
if (!hashKeyValues.isEmpty()) {
for (Object hashKeyValue : hashKeyValues) {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(hashKeyName, rowNum, hashKeyValue);
// we added 1 partition per hashkey value
return 1;
});
}
} else {
// always fall back to a scan, need to return at least one partition so stick the segment count in it
int segmentCount = DDBTableUtils.getNumSegments(table.getProvisionedReadCapacity(), table.getApproxTableSizeInBytes());
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(SEGMENT_COUNT_METADATA, rowNum, segmentCount);
return 1;
});
}
}
Aggregations