Search in sources :

Example 6 with SortedRangeSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet in project aws-athena-query-federation by awslabs.

the class BigQuerySqlUtils method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet, ArrowType type, List<QueryParameterValue> parameterValues) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), type, parameterValues));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), type, parameterValues));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), type, parameterValues));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), type, parameterValues));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), type, parameterValues));
        } else if (singleValues.size() > 1) {
            for (Object value : singleValues) {
                parameterValues.add(getValueForWhereClause(columnName, value, type));
            }
            String values = Joiner.on(",").join(Collections.nCopies(singleValues.size(), "?"));
            disjuncts.add(quote(columnName) + " IN (" + values + ")");
        }
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 7 with SortedRangeSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet in project aws-athena-query-federation by awslabs.

the class PredicateBuilder method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet, ArrowType type, HashMap<String, PredicateBuilder.TypeAndValue> accumulator) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), type, accumulator));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), type, accumulator));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), type, accumulator));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), type, accumulator));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), type, accumulator));
        } else if (singleValues.size() > 1) {
            for (Object value : singleValues) {
                accumulator.put(columnName, new PredicateBuilder.TypeAndValue(type, value));
            }
            String values = Joiner.on(",").join(Collections.nCopies(singleValues.size(), "<" + columnName + ">"));
            disjuncts.add(quote(columnName) + " IN (" + values + ")");
        }
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 8 with SortedRangeSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet in project aws-athena-query-federation by awslabs.

the class PredicateBuilder method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), valueSet.getType()));
        } else if (singleValues.size() > 1) {
            List<String> values = singleValues.stream().map(next -> quoteValue(next, valueSet.getType())).collect(Collectors.toList());
            String valuesStr = Joiner.on(",").join(values);
            disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
        }
    } else if (valueSet instanceof EquatableValueSet) {
        List<String> values = new ArrayList<>();
        for (int i = 0; i < ((EquatableValueSet) valueSet).getValueBlock().getRowCount(); i++) {
            values.add(quoteValue(((EquatableValueSet) valueSet).getValue(i), valueSet.getType()));
        }
        String valuesStr = Joiner.on(",").join(values);
        disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 9 with SortedRangeSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet in project aws-athena-query-federation by awslabs.

the class MetricUtils method makeGetMetricDataRequest.

/**
 * Creates a Cloudwatch Metrics sample data request from the provided inputs
 *
 * @param readRecordsRequest The RecordReadRequest to make into a Cloudwatch Metrics Data request.
 * @return The Cloudwatch Metrics Data request that matches the requested read operation.
 */
protected static GetMetricDataRequest makeGetMetricDataRequest(ReadRecordsRequest readRecordsRequest) {
    Split split = readRecordsRequest.getSplit();
    String serializedMetricStats = split.getProperty(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME);
    List<MetricStat> metricStats = MetricStatSerDe.deserialize(serializedMetricStats);
    GetMetricDataRequest dataRequest = new GetMetricDataRequest();
    com.amazonaws.services.cloudwatch.model.Metric metric = new com.amazonaws.services.cloudwatch.model.Metric();
    metric.setNamespace(split.getProperty(NAMESPACE_FIELD));
    metric.setMetricName(split.getProperty(METRIC_NAME_FIELD));
    List<MetricDataQuery> metricDataQueries = new ArrayList<>();
    int metricId = 1;
    for (MetricStat nextMetricStat : metricStats) {
        metricDataQueries.add(new MetricDataQuery().withMetricStat(nextMetricStat).withId("m" + metricId++));
    }
    dataRequest.withMetricDataQueries(metricDataQueries);
    ValueSet timeConstraint = readRecordsRequest.getConstraints().getSummary().get(TIMESTAMP_FIELD);
    if (timeConstraint instanceof SortedRangeSet && !timeConstraint.isNullAllowed()) {
        // SortedRangeSet is how >, <, between is represented which are easiest and most common when
        // searching logs so we attempt to push that down here as an optimization. SQL can represent complex
        // overlapping ranges which Cloudwatch can not support so this is not a replacement for applying
        // constraints using the ConstraintEvaluator.
        Range basicPredicate = ((SortedRangeSet) timeConstraint).getSpan();
        if (!basicPredicate.getLow().isNullValue()) {
            Long lowerBound = (Long) basicPredicate.getLow().getValue();
            // TODO: confirm timezone handling
            logger.info("makeGetMetricsRequest: with startTime " + (lowerBound * 1000) + " " + new Date(lowerBound * 1000));
            dataRequest.withStartTime(new Date(lowerBound * 1000));
        } else {
            // TODO: confirm timezone handling
            dataRequest.withStartTime(new Date(0));
        }
        if (!basicPredicate.getHigh().isNullValue()) {
            Long upperBound = (Long) basicPredicate.getHigh().getValue();
            // TODO: confirm timezone handling
            logger.info("makeGetMetricsRequest: with endTime " + (upperBound * 1000) + " " + new Date(upperBound * 1000));
            dataRequest.withEndTime(new Date(upperBound * 1000));
        } else {
            // TODO: confirm timezone handling
            dataRequest.withEndTime(new Date(System.currentTimeMillis()));
        }
    } else {
        // TODO: confirm timezone handling
        dataRequest.withStartTime(new Date(0));
        dataRequest.withEndTime(new Date(System.currentTimeMillis()));
    }
    return dataRequest;
}
Also used : MetricStat(com.amazonaws.services.cloudwatch.model.MetricStat) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) Date(java.util.Date) SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) GetMetricDataRequest(com.amazonaws.services.cloudwatch.model.GetMetricDataRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) MetricDataQuery(com.amazonaws.services.cloudwatch.model.MetricDataQuery) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Metric(com.amazonaws.services.cloudwatch.model.Metric)

Aggregations

Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)9 SortedRangeSet (com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)9 ArrayList (java.util.ArrayList)7 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)3 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)3 Split (com.amazonaws.athena.connector.lambda.domain.Split)1 GetMetricDataRequest (com.amazonaws.services.cloudwatch.model.GetMetricDataRequest)1 Metric (com.amazonaws.services.cloudwatch.model.Metric)1 MetricDataQuery (com.amazonaws.services.cloudwatch.model.MetricDataQuery)1 MetricStat (com.amazonaws.services.cloudwatch.model.MetricStat)1 Date (java.util.Date)1 List (java.util.List)1 Set (java.util.Set)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1