use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.
the class HiveRecordHandlerTest method getSingleValueSet.
private ValueSet getSingleValueSet(Object value) {
Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range.isSingleValue()).thenReturn(true);
Mockito.when(range.getLow().getValue()).thenReturn(value);
ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
return valueSet;
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.
the class ImpalaRecordHandlerTest method getSingleValueSet.
private ValueSet getSingleValueSet(Object value) {
Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range.isSingleValue()).thenReturn(true);
Mockito.when(range.getLow().getValue()).thenReturn(value);
ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
return valueSet;
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandler method pushDownConstraints.
/**
* Attempts to push down predicates into Cloudwatch Logs by decorating the Cloudwatch Logs request.
*
* @param constraints The constraints for the read as provided by Athena based on the customer's query.
* @param request The Cloudwatch Logs request to inject predicates to.
* @return The decorated Cloudwatch Logs request.
* @note This impl currently only pushing down SortedRangeSet filters (>=, =<, between) on the log time column.
*/
private GetLogEventsRequest pushDownConstraints(Constraints constraints, GetLogEventsRequest request) {
ValueSet timeConstraint = constraints.getSummary().get(LOG_TIME_FIELD);
if (timeConstraint instanceof SortedRangeSet && !timeConstraint.isNullAllowed()) {
// SortedRangeSet is how >, <, between is represented which are easiest and most common when
// searching logs so we attempt to push that down here as an optimization. SQL can represent complex
// overlapping ranges which Cloudwatch can not support so this is not a replacement for applying
// constraints using the ConstraintEvaluator.
Range basicPredicate = ((SortedRangeSet) timeConstraint).getSpan();
if (!basicPredicate.getLow().isNullValue()) {
Long lowerBound = (Long) basicPredicate.getLow().getValue();
request.setStartTime(lowerBound);
}
if (!basicPredicate.getHigh().isNullValue()) {
Long upperBound = (Long) basicPredicate.getHigh().getValue();
request.setEndTime(upperBound);
}
}
return request;
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtils method getBestIndexForPredicates.
/**
* Attempts to pick an optimal index (if any) from the given predicates. Returns the original table index if
* one was not found.
*
* @param table the original table
* @param predicates the predicates
* @return the optimal index if found, otherwise the original table index
*/
public static DynamoDBIndex getBestIndexForPredicates(DynamoDBTable table, List<String> requestedCols, Map<String, ValueSet> predicates) {
Set<String> columnNames = predicates.keySet();
ImmutableList.Builder<DynamoDBIndex> hashKeyMatchesBuilder = ImmutableList.builder();
// create the original table index
DynamoDBIndex tableIndex = new DynamoDBIndex(table.getName(), table.getHashKey(), table.getRangeKey(), ProjectionType.ALL, ImmutableList.of());
// if the original table has a hash key matching a predicate, start with that
if (columnNames.contains(tableIndex.getHashKey())) {
// here, treat table as a special index
hashKeyMatchesBuilder.add(tableIndex);
}
// requested columns must be projected in index
List<DynamoDBIndex> candidateIndices = table.getIndexes().stream().filter(index -> indexContainsAllRequiredColumns(requestedCols, index, table)).collect(Collectors.toList());
// get indices with hash keys that match a predicate
candidateIndices.stream().filter(index -> columnNames.contains(index.getHashKey()) && !getHashKeyAttributeValues(predicates.get(index.getHashKey())).isEmpty()).forEach(hashKeyMatchesBuilder::add);
List<DynamoDBIndex> hashKeyMatches = hashKeyMatchesBuilder.build();
// if the original table has a range key matching a predicate, start with that
ImmutableList.Builder<DynamoDBIndex> rangeKeyMatchesBuilder = ImmutableList.builder();
if (tableIndex.getRangeKey().isPresent() && columnNames.contains(tableIndex.getRangeKey().get())) {
rangeKeyMatchesBuilder.add(tableIndex);
}
// get indices with range keys that match a predicate
candidateIndices.stream().filter(index -> index.getRangeKey().isPresent() && columnNames.contains(index.getRangeKey().get())).forEach(rangeKeyMatchesBuilder::add);
List<DynamoDBIndex> rangeKeyMatches = rangeKeyMatchesBuilder.build();
// return first index where both hash and range key can be specified with predicates
for (DynamoDBIndex index : hashKeyMatches) {
if (rangeKeyMatches.contains(index)) {
return index;
}
}
// else return the first index with a hash key predicate, or the original table if there are none
return hashKeyMatches.isEmpty() ? tableIndex : hashKeyMatches.get(0);
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.
the class DDBPredicateUtils method generateSingleColumnFilter.
/**
* Generates a filter expression for a single column given a {@link ValueSet} predicate for that column.
*
* @param originalColumnName the column name
* @param predicate the associated predicate
* @param accumulator the value accumulator to add values to
* @param valueNameProducer the value name producer to generate value aliases with
* @param recordMetadata object containing any necessary metadata from the glue table
* @return the generated filter expression
*/
public static String generateSingleColumnFilter(String originalColumnName, ValueSet predicate, List<AttributeValue> accumulator, IncrementingValueNameProducer valueNameProducer, DDBRecordMetadata recordMetadata) {
String columnName = aliasColumn(originalColumnName);
if (predicate.isNone()) {
return "(attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
}
if (predicate.isAll()) {
return "(attribute_exists(" + columnName + ") AND " + toPredicate(originalColumnName, "<>", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
}
List<String> disjuncts = new ArrayList<>();
List<Object> singleValues = new ArrayList<>();
boolean isWhitelist = true;
if (predicate instanceof SortedRangeSet) {
for (Range range : predicate.getRanges().getOrderedRanges()) {
// Already checked
checkState(!range.isAll());
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
List<String> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
switch(range.getLow().getBound()) {
case ABOVE:
rangeConjuncts.add(toPredicate(originalColumnName, ">", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
break;
case EXACTLY:
rangeConjuncts.add(toPredicate(originalColumnName, ">=", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
default:
throw new AssertionError("Unhandled lower bound: " + range.getLow().getBound());
}
}
if (!range.getHigh().isUpperUnbounded()) {
switch(range.getHigh().getBound()) {
case ABOVE:
throw new IllegalArgumentException("High marker should never use ABOVE bound");
case EXACTLY:
rangeConjuncts.add(toPredicate(originalColumnName, "<=", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
break;
case BELOW:
rangeConjuncts.add(toPredicate(originalColumnName, "<", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
break;
default:
throw new AssertionError("Unhandled upper bound: " + range.getHigh().getBound());
}
}
// If rangeConjuncts is null, then the range was ALL, which should already have been checked for
checkState(!rangeConjuncts.isEmpty());
disjuncts.add("(" + AND_JOINER.join(rangeConjuncts) + ")");
}
}
} else {
EquatableValueSet equatablePredicate = (EquatableValueSet) predicate;
isWhitelist = equatablePredicate.isWhiteList();
long valueCount = equatablePredicate.getValueBlock().getRowCount();
for (int i = 0; i < valueCount; i++) {
singleValues.add(equatablePredicate.getValue(i));
}
}
// Add back all of the possible single values either as an equality or an IN predicate
if (singleValues.size() == 1) {
disjuncts.add(toPredicate(originalColumnName, isWhitelist ? "=" : "<>", getOnlyElement(singleValues), accumulator, valueNameProducer.getNext(), recordMetadata));
} else if (singleValues.size() > 1) {
for (Object value : singleValues) {
bindValue(originalColumnName, value, accumulator, recordMetadata);
}
String values = COMMA_JOINER.join(Stream.generate(valueNameProducer::getNext).limit(singleValues.size()).collect(toImmutableList()));
disjuncts.add((isWhitelist ? "" : "NOT ") + columnName + " IN (" + values + ")");
}
// at this point we should have some disjuncts
checkState(!disjuncts.isEmpty());
// add nullability disjuncts
if (predicate.isNullAllowed()) {
disjuncts.add("attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata));
}
// DDB doesn't like redundant parentheses
if (disjuncts.size() == 1) {
return disjuncts.get(0);
}
return "(" + OR_JOINER.join(disjuncts) + ")";
}
Aggregations