Search in sources :

Example 96 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class CloudwatchRecordHandlerTest method doReadRecordsNoSpill.

@Test
public void doReadRecordsNoSpill() throws Exception {
    logger.info("doReadRecordsNoSpill: enter");
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("time", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 100L)), false));
    ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog", "queryId-" + System.currentTimeMillis(), new TableName("schema", "table"), schemaForRead, Split.newBuilder(S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build(), keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(), new Constraints(constraintsMap), 100_000_000_000L, // 100GB don't expect this to spill
    100_000_000_000L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof ReadRecordsResponse);
    ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
    logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
    assertTrue(response.getRecords().getRowCount() == 3);
    logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
    logger.info("doReadRecordsNoSpill: exit");
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) HashMap(java.util.HashMap) ReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 97 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DataLakeRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 98 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetTableLayoutScanWithTypeOverride.

@Test
public void doGetTableLayoutScanWithTypeOverride() throws Exception {
    List<Column> columns = new ArrayList<>();
    columns.add(new Column().withName("col1").withType("int"));
    columns.add(new Column().withName("col2").withType("timestamptz"));
    columns.add(new Column().withName("col3").withType("string"));
    Map<String, String> param = ImmutableMap.of(SOURCE_TABLE_PROPERTY, TEST_TABLE, COLUMN_NAME_MAPPING_PROPERTY, "col1=Col1", DATETIME_FORMAT_MAPPING_PROPERTY, "col1=datetime1,col3=datetime3 ");
    Table table = new Table().withParameters(param).withPartitionKeys().withStorageDescriptor(new StorageDescriptor().withColumns(columns));
    GetTableResult mockResult = new GetTableResult().withTable(table);
    when(glueClient.getTable(any())).thenReturn(mockResult);
    TableName tableName = new TableName(DEFAULT_SCHEMA, "glueTableForTestTable");
    GetTableRequest getTableRequest = new GetTableRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName);
    GetTableResponse getTableResponse = handler.doGetTable(allocator, getTableRequest);
    logger.info("validateSourceTableNamePropagation: GetTableResponse[{}]", getTableResponse);
    Map<String, String> customMetadata = getTableResponse.getSchema().getCustomMetadata();
    assertThat(customMetadata.get(SOURCE_TABLE_PROPERTY), equalTo(TEST_TABLE));
    assertThat(customMetadata.get(DATETIME_FORMAT_MAPPING_PROPERTY_NORMALIZED), equalTo("Col1=datetime1,col3=datetime3"));
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
    constraintsMap.put("col2", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, tableName, new Constraints(constraintsMap), getTableResponse.getSchema(), Collections.EMPTY_SET);
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, getTableLayoutRequest);
    logger.info("doGetTableLayoutScanWithTypeOverride schema - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayoutScanWithTypeOverride partitions - {}", res.getPartitions());
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(SCAN_PARTITION_TYPE));
    // no hash key constraints, so look for segment count column
    assertThat(res.getPartitions().getSchema().findField(SEGMENT_COUNT_METADATA) != null, is(true));
    assertThat(res.getPartitions().getRowCount(), equalTo(1));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(NON_KEY_FILTER_METADATA), equalTo("(#col3 = :v0 OR attribute_not_exists(#col3) OR #col3 = :v1)"));
    ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col3", "col3", "#col2", "col2");
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
    ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(true), ":v1", ItemUtils.toAttributeValue(null));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
Also used : AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Table(com.amazonaws.services.glue.model.Table) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) Column(com.amazonaws.services.glue.model.Column) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) Test(org.junit.Test)

Example 99 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandlerTest method doGetTableLayoutScan.

@Test
public void doGetTableLayoutScan() throws Exception {
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col_3", EquatableValueSet.newBuilder(allocator, new ArrowType.Bool(), true, true).add(true).build());
    GetTableLayoutRequest req = new GetTableLayoutRequest(TEST_IDENTITY, TEST_QUERY_ID, TEST_CATALOG_NAME, new TableName(TEST_CATALOG_NAME, TEST_TABLE), new Constraints(constraintsMap), SchemaBuilder.newBuilder().build(), Collections.EMPTY_SET);
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout schema - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout partitions - {}", res.getPartitions());
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(PARTITION_TYPE_METADATA), equalTo(SCAN_PARTITION_TYPE));
    // no hash key constraints, so look for segment count column
    assertThat(res.getPartitions().getSchema().findField(SEGMENT_COUNT_METADATA) != null, is(true));
    assertThat(res.getPartitions().getRowCount(), equalTo(1));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(NON_KEY_FILTER_METADATA), equalTo("(#col_3 = :v0 OR attribute_not_exists(#col_3) OR #col_3 = :v1)"));
    ImmutableMap<String, String> expressionNames = ImmutableMap.of("#col_3", "col_3");
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_NAMES_METADATA), equalTo(Jackson.toJsonString(expressionNames)));
    ImmutableMap<String, AttributeValue> expressionValues = ImmutableMap.of(":v0", ItemUtils.toAttributeValue(true), ":v1", ItemUtils.toAttributeValue(null));
    assertThat(res.getPartitions().getSchema().getCustomMetadata().get(EXPRESSION_VALUES_METADATA), equalTo(Jackson.toJsonString(expressionValues)));
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) HashMap(java.util.HashMap) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 100 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class DynamoDBMetadataHandler method getPartitions.

/**
 * Generates hash key partitions if possible or generates a single partition with the heuristically
 * determined optimal scan segment count specified inside of it
 *
 * @see GlueMetadataHandler
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
    // TODO consider caching this repeated work in #enhancePartitionSchema
    // use the source table name from the schema if available (in case Glue table name != actual table name)
    String tableName = getSourceTableName(request.getSchema());
    if (tableName == null) {
        tableName = request.getTableName().getTableName();
    }
    DynamoDBTable table = tableResolver.getTableMetadata(tableName);
    Map<String, ValueSet> summary = request.getConstraints().getSummary();
    List<String> requestedCols = request.getSchema().getFields().stream().map(Field::getName).collect(Collectors.toList());
    DynamoDBIndex index = DDBPredicateUtils.getBestIndexForPredicates(table, requestedCols, summary);
    logger.info("using index: {}", index.getName());
    String hashKeyName = index.getHashKey();
    ValueSet hashKeyValueSet = summary.get(hashKeyName);
    List<Object> hashKeyValues = (hashKeyValueSet != null) ? DDBPredicateUtils.getHashKeyAttributeValues(hashKeyValueSet) : Collections.emptyList();
    if (!hashKeyValues.isEmpty()) {
        for (Object hashKeyValue : hashKeyValues) {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(hashKeyName, rowNum, hashKeyValue);
                // we added 1 partition per hashkey value
                return 1;
            });
        }
    } else {
        // always fall back to a scan, need to return at least one partition so stick the segment count in it
        int segmentCount = DDBTableUtils.getNumSegments(table.getProvisionedReadCapacity(), table.getApproxTableSizeInBytes());
        blockWriter.writeRows((Block block, int rowNum) -> {
            block.setValue(SEGMENT_COUNT_METADATA, rowNum, segmentCount);
            return 1;
        });
    }
}
Also used : DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) Block(com.amazonaws.athena.connector.lambda.data.Block) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable)

Aggregations

ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)104 Test (org.junit.Test)66 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)63 HashMap (java.util.HashMap)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)47 Schema (org.apache.arrow.vector.types.pojo.Schema)37 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)27 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)26 ArrayList (java.util.ArrayList)25 Matchers.anyString (org.mockito.Matchers.anyString)25 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)24 Block (com.amazonaws.athena.connector.lambda.data.Block)23 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)21 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)18 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)17 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)17 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)13