Search in sources :

Example 1 with EquatableValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet in project aws-athena-query-federation by awslabs.

the class PropertyGraphHandler method getQueryPartForContraintsMap.

/**
 * Used to generate Gremlin Query part for Constraint Map
 *
 * @param traversal Gremlin Traversal, traversal is updated based on constraints
 *                  map
 * @param hasMap    Constraint Hash Map
 *
 * @return A Gremlin Query Part equivalent to Contraint.
 */
public GraphTraversal getQueryPartForContraintsMap(GraphTraversal traversal, final ReadRecordsRequest recordsRequest) {
    final Map<String, ValueSet> hashMap = recordsRequest.getConstraints().getSummary();
    if (hashMap.size() == 0) {
        return traversal;
    }
    logger.info("readWithContraint: Constaints Map " + hashMap.toString());
    final Set<String> setOfkeys = (Set<String>) (hashMap.keySet());
    for (final String key : setOfkeys) {
        if (hashMap.get(key) instanceof SortedRangeSet) {
            final List<Range> ranges = ((SortedRangeSet) hashMap.get(key)).getOrderedRanges();
            for (final Range range : ranges) {
                if (!range.getLow().isNullValue() && !range.getHigh().isNullValue()) {
                    if (range.getLow().getValue().toString().equals(range.getHigh().getValue().toString())) {
                        traversal = GremlinQueryPreProcessor.generateGremlinQueryPart(traversal, key, range.getLow().getValue().toString(), range.getType(), range.getLow().getBound(), GremlinQueryPreProcessor.Operator.EQUALTO);
                        break;
                    }
                }
                if (!range.getLow().isNullValue()) {
                    traversal = GremlinQueryPreProcessor.generateGremlinQueryPart(traversal, key, range.getLow().getValue().toString(), range.getType(), range.getLow().getBound(), GremlinQueryPreProcessor.Operator.GREATERTHAN);
                }
                if (!range.getHigh().isNullValue()) {
                    traversal = GremlinQueryPreProcessor.generateGremlinQueryPart(traversal, key, range.getHigh().getValue().toString(), range.getType(), range.getHigh().getBound(), GremlinQueryPreProcessor.Operator.LESSTHAN);
                }
            }
        }
        if (hashMap.get(key) instanceof EquatableValueSet) {
            final EquatableValueSet valueSet = ((EquatableValueSet) hashMap.get(key));
            if (valueSet.isWhiteList()) {
                traversal = GremlinQueryPreProcessor.generateGremlinQueryPart(traversal, key, valueSet.getValue(0).toString(), valueSet.getType(), null, GremlinQueryPreProcessor.Operator.EQUALTO);
            } else {
                traversal = GremlinQueryPreProcessor.generateGremlinQueryPart(traversal, key, valueSet.getValue(0).toString(), valueSet.getType(), null, GremlinQueryPreProcessor.Operator.NOTEQUALTO);
            }
        }
    }
    return traversal;
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) Set(java.util.Set) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 2 with EquatableValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet in project aws-athena-query-federation by awslabs.

the class DDBPredicateUtils method generateSingleColumnFilter.

/**
 * Generates a filter expression for a single column given a {@link ValueSet} predicate for that column.
 *
 * @param originalColumnName the column name
 * @param predicate the associated predicate
 * @param accumulator the value accumulator to add values to
 * @param valueNameProducer the value name producer to generate value aliases with
 * @param recordMetadata object containing any necessary metadata from the glue table
 * @return the generated filter expression
 */
public static String generateSingleColumnFilter(String originalColumnName, ValueSet predicate, List<AttributeValue> accumulator, IncrementingValueNameProducer valueNameProducer, DDBRecordMetadata recordMetadata) {
    String columnName = aliasColumn(originalColumnName);
    if (predicate.isNone()) {
        return "(attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
    }
    if (predicate.isAll()) {
        return "(attribute_exists(" + columnName + ") AND " + toPredicate(originalColumnName, "<>", null, accumulator, valueNameProducer.getNext(), recordMetadata) + ")";
    }
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    boolean isWhitelist = true;
    if (predicate instanceof SortedRangeSet) {
        for (Range range : predicate.getRanges().getOrderedRanges()) {
            // Already checked
            checkState(!range.isAll());
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(originalColumnName, ">", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(originalColumnName, ">=", range.getLow().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled lower bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(originalColumnName, "<=", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(originalColumnName, "<", range.getHigh().getValue(), accumulator, valueNameProducer.getNext(), recordMetadata));
                            break;
                        default:
                            throw new AssertionError("Unhandled upper bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + AND_JOINER.join(rangeConjuncts) + ")");
            }
        }
    } else {
        EquatableValueSet equatablePredicate = (EquatableValueSet) predicate;
        isWhitelist = equatablePredicate.isWhiteList();
        long valueCount = equatablePredicate.getValueBlock().getRowCount();
        for (int i = 0; i < valueCount; i++) {
            singleValues.add(equatablePredicate.getValue(i));
        }
    }
    // Add back all of the possible single values either as an equality or an IN predicate
    if (singleValues.size() == 1) {
        disjuncts.add(toPredicate(originalColumnName, isWhitelist ? "=" : "<>", getOnlyElement(singleValues), accumulator, valueNameProducer.getNext(), recordMetadata));
    } else if (singleValues.size() > 1) {
        for (Object value : singleValues) {
            bindValue(originalColumnName, value, accumulator, recordMetadata);
        }
        String values = COMMA_JOINER.join(Stream.generate(valueNameProducer::getNext).limit(singleValues.size()).collect(toImmutableList()));
        disjuncts.add((isWhitelist ? "" : "NOT ") + columnName + " IN (" + values + ")");
    }
    // at this point we should have some disjuncts
    checkState(!disjuncts.isEmpty());
    // add nullability disjuncts
    if (predicate.isNullAllowed()) {
        disjuncts.add("attribute_not_exists(" + columnName + ") OR " + toPredicate(originalColumnName, "=", null, accumulator, valueNameProducer.getNext(), recordMetadata));
    }
    // DDB doesn't like redundant parentheses
    if (disjuncts.size() == 1) {
        return disjuncts.get(0);
    }
    return "(" + OR_JOINER.join(disjuncts) + ")";
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)

Example 3 with EquatableValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet in project aws-athena-query-federation by awslabs.

the class PredicateBuilder method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), valueSet.getType()));
        } else if (singleValues.size() > 1) {
            List<String> values = singleValues.stream().map(next -> quoteValue(next, valueSet.getType())).collect(Collectors.toList());
            String valuesStr = Joiner.on(",").join(values);
            disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
        }
    } else if (valueSet instanceof EquatableValueSet) {
        List<String> values = new ArrayList<>();
        for (int i = 0; i < ((EquatableValueSet) valueSet).getValueBlock().getRowCount(); i++) {
            values.add(quoteValue(((EquatableValueSet) valueSet).getValue(i), valueSet.getType()));
        }
        String valuesStr = Joiner.on(",").join(values);
        disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 4 with EquatableValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet in project aws-athena-query-federation by awslabs.

the class QueryUtils method makePredicate.

/**
 * Converts a single field constraint into a Document for use in a DocumentDB query.
 *
 * @param field The field for the given ValueSet constraint.
 * @param constraint The constraint to apply to the given field.
 * @return A Document describing the constraint for pushing down into DocumentDB.
 */
public static Document makePredicate(Field field, ValueSet constraint) {
    String name = field.getName();
    if (constraint.isNone()) {
        return documentOf(name, isNullPredicate());
    }
    if (constraint.isAll()) {
        return documentOf(name, isNotNullPredicate());
    }
    if (constraint.isNullAllowed()) {
        // TODO: support nulls mixed with discrete value constraints
        return null;
    }
    if (constraint instanceof EquatableValueSet) {
        Block block = ((EquatableValueSet) constraint).getValues();
        List<Object> singleValues = new ArrayList<>();
        FieldReader fieldReader = block.getFieldReaders().get(0);
        for (int i = 0; i < block.getRowCount(); i++) {
            Document nextEqVal = new Document();
            fieldReader.setPosition(i);
            Object value = fieldReader.readObject();
            nextEqVal.put(EQ_OP, convert(value));
            singleValues.add(singleValues);
        }
        return orPredicate(singleValues.stream().map(next -> new Document(name, next)).collect(toList()));
    }
    List<Object> singleValues = new ArrayList<>();
    List<Document> disjuncts = new ArrayList<>();
    for (Range range : constraint.getRanges().getOrderedRanges()) {
        if (range.isSingleValue()) {
            singleValues.add(convert(range.getSingleValue()));
        } else {
            Document rangeConjuncts = new Document();
            if (!range.getLow().isLowerUnbounded()) {
                switch(range.getLow().getBound()) {
                    case ABOVE:
                        rangeConjuncts.put(GT_OP, convert(range.getLow().getValue()));
                        break;
                    case EXACTLY:
                        rangeConjuncts.put(GTE_OP, convert(range.getLow().getValue()));
                        break;
                    case BELOW:
                        throw new IllegalArgumentException("Low Marker should never use BELOW bound: " + range);
                    default:
                        throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                }
            }
            if (!range.getHigh().isUpperUnbounded()) {
                switch(range.getHigh().getBound()) {
                    case ABOVE:
                        throw new IllegalArgumentException("High Marker should never use ABOVE bound: " + range);
                    case EXACTLY:
                        rangeConjuncts.put(LTE_OP, convert(range.getHigh().getValue()));
                        break;
                    case BELOW:
                        rangeConjuncts.put(LT_OP, convert(range.getHigh().getValue()));
                        break;
                    default:
                        throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                }
            }
            // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
            verify(!rangeConjuncts.isEmpty());
            disjuncts.add(rangeConjuncts);
        }
    }
    // Add back all of the possible single values either as an equality or an IN predicate
    if (singleValues.size() == 1) {
        disjuncts.add(documentOf(EQ_OP, singleValues.get(0)));
    } else if (singleValues.size() > 1) {
        disjuncts.add(documentOf(IN_OP, singleValues));
    }
    return orPredicate(disjuncts.stream().map(disjunct -> new Document(name, disjunct)).collect(toList()));
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) Document(org.bson.Document) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) Block(com.amazonaws.athena.connector.lambda.data.Block) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader)

Example 5 with EquatableValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet in project aws-athena-query-federation by awslabs.

the class ElasticsearchQueryUtils method getPredicate.

/**
 * Converts a single field constraint into a predicate to use in an Elasticsearch query.
 * @param fieldName The name of the field for the given ValueSet constraint.
 * @param constraint The constraint to apply to the given field.
 * @return A string describing the constraint for pushing down into Elasticsearch.
 */
private static String getPredicate(String fieldName, ValueSet constraint) {
    if (constraint.isNone()) {
        // (NOT _exists_:field)
        return existsPredicate(false, fieldName);
    }
    if (constraint.isAll()) {
        // (_exists_:field)
        return existsPredicate(true, fieldName);
    }
    List<String> predicateParts = new ArrayList<>();
    if (!constraint.isNullAllowed()) {
        // null value should not be included in set of returned values => Include existence predicate.
        predicateParts.add(existsPredicate(true, fieldName));
    }
    if (constraint instanceof EquatableValueSet) {
        EquatableValueSet equatableValueSet = (EquatableValueSet) constraint;
        List<String> singleValues = new ArrayList<>();
        for (int pos = 0; pos < equatableValueSet.getValueBlock().getRowCount(); pos++) {
            singleValues.add(equatableValueSet.getValue(pos).toString());
        }
        if (equatableValueSet.isWhiteList()) {
            // field:(value1 OR value2 OR value3...)
            predicateParts.add(fieldName + ":(" + Strings.collectionToDelimitedString(singleValues, OR_OPER) + ")");
        } else {
            // NOT field:(value1 OR value2 OR value3...)
            predicateParts.add(NOT_OPER + fieldName + ":(" + Strings.collectionToDelimitedString(singleValues, OR_OPER) + ")");
        }
    } else {
        String rangedPredicate = getPredicateFromRange(fieldName, constraint);
        if (!rangedPredicate.isEmpty()) {
            predicateParts.add(rangedPredicate);
        }
    }
    return predicateParts.isEmpty() ? EMPTY_PREDICATE : Strings.collectionToDelimitedString(predicateParts, AND_OPER);
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList)

Aggregations

EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)5 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)4 ArrayList (java.util.ArrayList)4 SortedRangeSet (com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)3 Block (com.amazonaws.athena.connector.lambda.data.Block)1 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)1 List (java.util.List)1 Set (java.util.Set)1 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)1 Document (org.bson.Document)1