Search in sources :

Example 11 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class HiveRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 12 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class ImpalaRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 13 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class CloudwatchRecordHandler method pushDownConstraints.

/**
 * Attempts to push down predicates into Cloudwatch Logs by decorating the Cloudwatch Logs request.
 *
 * @param constraints The constraints for the read as provided by Athena based on the customer's query.
 * @param request The Cloudwatch Logs request to inject predicates to.
 * @return The decorated Cloudwatch Logs request.
 * @note This impl currently only pushing down SortedRangeSet filters (>=, =<, between) on the log time column.
 */
private GetLogEventsRequest pushDownConstraints(Constraints constraints, GetLogEventsRequest request) {
    ValueSet timeConstraint = constraints.getSummary().get(LOG_TIME_FIELD);
    if (timeConstraint instanceof SortedRangeSet && !timeConstraint.isNullAllowed()) {
        // SortedRangeSet is how >, <, between is represented which are easiest and most common when
        // searching logs so we attempt to push that down here as an optimization. SQL can represent complex
        // overlapping ranges which Cloudwatch can not support so this is not a replacement for applying
        // constraints using the ConstraintEvaluator.
        Range basicPredicate = ((SortedRangeSet) timeConstraint).getSpan();
        if (!basicPredicate.getLow().isNullValue()) {
            Long lowerBound = (Long) basicPredicate.getLow().getValue();
            request.setStartTime(lowerBound);
        }
        if (!basicPredicate.getHigh().isNullValue()) {
            Long upperBound = (Long) basicPredicate.getHigh().getValue();
            request.setEndTime(upperBound);
        }
    }
    return request;
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) AtomicLong(java.util.concurrent.atomic.AtomicLong) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 14 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtils method getBestIndexForPredicates.

/**
 * Attempts to pick an optimal index (if any) from the given predicates. Returns the original table index if
 * one was not found.
 *
 * @param table the original table
 * @param predicates the predicates
 * @return the optimal index if found, otherwise the original table index
 */
public static DynamoDBIndex getBestIndexForPredicates(DynamoDBTable table, List<String> requestedCols, Map<String, ValueSet> predicates) {
    Set<String> columnNames = predicates.keySet();
    ImmutableList.Builder<DynamoDBIndex> hashKeyMatchesBuilder = ImmutableList.builder();
    // create the original table index
    DynamoDBIndex tableIndex = new DynamoDBIndex(table.getName(), table.getHashKey(), table.getRangeKey(), ProjectionType.ALL, ImmutableList.of());
    // if the original table has a hash key matching a predicate, start with that
    if (columnNames.contains(tableIndex.getHashKey())) {
        // here, treat table as a special index
        hashKeyMatchesBuilder.add(tableIndex);
    }
    // requested columns must be projected in index
    List<DynamoDBIndex> candidateIndices = table.getIndexes().stream().filter(index -> indexContainsAllRequiredColumns(requestedCols, index, table)).collect(Collectors.toList());
    // get indices with hash keys that match a predicate
    candidateIndices.stream().filter(index -> columnNames.contains(index.getHashKey()) && !getHashKeyAttributeValues(predicates.get(index.getHashKey())).isEmpty()).forEach(hashKeyMatchesBuilder::add);
    List<DynamoDBIndex> hashKeyMatches = hashKeyMatchesBuilder.build();
    // if the original table has a range key matching a predicate, start with that
    ImmutableList.Builder<DynamoDBIndex> rangeKeyMatchesBuilder = ImmutableList.builder();
    if (tableIndex.getRangeKey().isPresent() && columnNames.contains(tableIndex.getRangeKey().get())) {
        rangeKeyMatchesBuilder.add(tableIndex);
    }
    // get indices with range keys that match a predicate
    candidateIndices.stream().filter(index -> index.getRangeKey().isPresent() && columnNames.contains(index.getRangeKey().get())).forEach(rangeKeyMatchesBuilder::add);
    List<DynamoDBIndex> rangeKeyMatches = rangeKeyMatchesBuilder.build();
    // return first index where both hash and range key can be specified with predicates
    for (DynamoDBIndex index : hashKeyMatches) {
        if (rangeKeyMatches.contains(index)) {
            return index;
        }
    }
    // else return the first index with a hash key predicate, or the original table if there are none
    return hashKeyMatches.isEmpty() ? tableIndex : hashKeyMatches.get(0);
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Set(java.util.Set) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Preconditions.checkState(com.google.common.base.Preconditions.checkState) HashSet(java.util.HashSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex) List(java.util.List) Stream(java.util.stream.Stream) ImmutableList(com.google.common.collect.ImmutableList) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ItemUtils(com.amazonaws.services.dynamodbv2.document.ItemUtils) DynamoDBTable(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable) ProjectionType(com.amazonaws.services.dynamodbv2.model.ProjectionType) Joiner(com.google.common.base.Joiner) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) DynamoDBIndex(com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex)

Example 15 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtils method generateSingleColumnFilter.

/**
 * Generates a filter expression for a single column given a {@link ValueSet} predicate for that column.
 *
 * @param originalColumnName the column name
 * @param predicate the associated predicate
 * @param accumulator the value accumulator to add values to
 * @param valueNameProducer the value name producer to generate value aliases with
 * @param recordMetadata object containing any necessary metadata from the glue table
 * @return the generated filter expression
 */
public static String generateSingleColumnFilter(String originalColumnName, ValueSet predicate, List<AttributeValue> accumulator, IncrementingValueNameProducer valueNameProducer, DDBRecordMetadata recordMetadata) {
    String columnName = aliasColumn(originalColumnName);
    if (predicate.isNone()) {
        return "(attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
    }
    if (predicate.isAll()) {
        return "(attribute_exists(" + columnName + ") AND " + toPredicate(originalColumnName, "<>", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
    }
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    boolean isWhitelist = true;
    if (predicate instanceof SortedRangeSet) {
        for (Range range : predicate.getRanges().getOrderedRanges()) {
            // Already checked
            checkState(!range.isAll());
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(originalColumnName, ">", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(originalColumnName, ">=", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled lower bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(originalColumnName, "<=", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(originalColumnName, "<", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        default:
                            throw new AssertionError("Unhandled upper bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + AND_JOINER.join(rangeConjuncts) + ")");
            }
        }
    } else {
        EquatableValueSet equatablePredicate = (EquatableValueSet) predicate;
        isWhitelist = equatablePredicate.isWhiteList();
        long valueCount = equatablePredicate.getValueBlock().getRowCount();
        for (int i = 0; i < valueCount; i++) {
            singleValues.add(equatablePredicate.getValue(i));
        }
    }
    // Add back all of the possible single values either as an equality or an IN predicate
    if (singleValues.size() == 1) {
        disjuncts.add(toPredicate(originalColumnName, isWhitelist ? "=" : "<>", getOnlyElement(singleValues), accumulator, valueNameProducer.getNext(), recordMetadata));
    } else if (singleValues.size() > 1) {
        for (Object value : singleValues) {
            bindValue(originalColumnName, value, accumulator, recordMetadata);
        }
        String values = COMMA_JOINER.join(Stream.generate(valueNameProducer::getNext).limit(singleValues.size()).collect(toImmutableList()));
        disjuncts.add((isWhitelist ? "" : "NOT ") + columnName + " IN (" + values + ")");
    }
    // at this point we should have some disjuncts
    checkState(!disjuncts.isEmpty());
    // add nullability disjuncts
    if (predicate.isNullAllowed()) {
        disjuncts.add("attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata));
    }
    // DDB doesn't like redundant parentheses
    if (disjuncts.size() == 1) {
        return disjuncts.get(0);
    }
    return "(" + OR_JOINER.join(disjuncts) + ")";
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)

Aggregations

Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)34 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)26 ArrayList (java.util.ArrayList)11 SortedRangeSet (com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)10 Split (com.amazonaws.athena.connector.lambda.domain.Split)8 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)8 Test (org.junit.Test)8 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)7 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)7 PreparedStatement (java.sql.PreparedStatement)7 Schema (org.apache.arrow.vector.types.pojo.Schema)7 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 FieldBuilder (com.amazonaws.athena.connector.lambda.data.FieldBuilder)2 JdbcSplitQueryBuilder (com.amazonaws.athena.connectors.jdbc.manager.JdbcSplitQueryBuilder)2 List (java.util.List)2 Set (java.util.Set)2 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)2 Block (com.amazonaws.athena.connector.lambda.data.Block)1 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)1