Search in sources :

Example 26 with TimeValue

use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.

the class DefaultCube method getTimeSeries.

private Table<Map<String, String>, String, Map<Long, Long>> getTimeSeries(CubeQuery query, FactScanner scanner) {
    // {dimension values, measure} -> {time -> value}s
    Table<Map<String, String>, String, Map<Long, Long>> result = HashBasedTable.create();
    int count = 0;
    while (scanner.hasNext()) {
        FactScanResult next = scanner.next();
        incrementMetric("cube.query.scan.records.count", 1);
        boolean skip = false;
        // using tree map, as we are using it as a key for a map
        Map<String, String> seriesDimensions = Maps.newTreeMap();
        for (String dimensionName : query.getGroupByDimensions()) {
            // todo: use Map<String, String> instead of List<DimensionValue> into a String, String, everywhere
            for (DimensionValue dimensionValue : next.getDimensionValues()) {
                if (dimensionName.equals(dimensionValue.getName())) {
                    if (dimensionValue.getValue() == null) {
                        // Currently, we do NOT return null as grouped by value.
                        // Depending on whether dimension is required or not the records with null value in it may or may not be
                        // in aggregation. At this moment, the choosing of the aggregation for query doesn't look at this, so
                        // potentially null may or may not be included in results, depending on the aggregation selected
                        // querying. We don't want to produce inconsistent results varying due to different aggregations selected,
                        // so don't return nulls in any of those cases.
                        skip = true;
                        continue;
                    }
                    seriesDimensions.put(dimensionName, dimensionValue.getValue());
                    break;
                }
            }
        }
        if (skip) {
            incrementMetric("cube.query.scan.skipped.count", 1);
            continue;
        }
        for (TimeValue timeValue : next) {
            Map<Long, Long> timeValues = result.get(seriesDimensions, next.getMeasureName());
            if (timeValues == null) {
                result.put(seriesDimensions, next.getMeasureName(), Maps.<Long, Long>newHashMap());
            }
            AggregationFunction function = query.getMeasurements().get(next.getMeasureName());
            if (AggregationFunction.SUM == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value == null ? 0 : value;
                value += timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.MAX == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value != null && value > timeValue.getValue() ? value : timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.MIN == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value != null && value < timeValue.getValue() ? value : timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.LATEST == function) {
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), timeValue.getValue());
            } else {
                // should never happen: developer error
                throw new RuntimeException("Unknown MeasureType: " + function);
            }
        }
        if (++count >= MAX_RECORDS_TO_SCAN) {
            break;
        }
    }
    return result;
}
Also used : AggregationFunction(io.cdap.cdap.api.dataset.lib.cube.AggregationFunction) FactScanResult(io.cdap.cdap.data2.dataset2.lib.timeseries.FactScanResult) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue)

Example 27 with TimeValue

use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.

the class DefaultCube method convertToQueryResult.

private Collection<TimeSeries> convertToQueryResult(CubeQuery query, Table<Map<String, String>, String, Map<Long, Long>> resultTable) {
    List<TimeSeries> result = new ArrayList<>();
    // iterating each groupValue dimensions
    for (Map.Entry<Map<String, String>, Map<String, Map<Long, Long>>> row : resultTable.rowMap().entrySet()) {
        // iterating each measure
        for (Map.Entry<String, Map<Long, Long>> measureEntry : row.getValue().entrySet()) {
            // generating time series for a grouping and a measure
            int count = 0;
            List<TimeValue> timeValues = new ArrayList<>();
            for (Map.Entry<Long, Long> timeValue : measureEntry.getValue().entrySet()) {
                timeValues.add(new TimeValue(timeValue.getKey(), timeValue.getValue()));
            }
            Collections.sort(timeValues);
            List<TimeValue> resultTimeValues = new ArrayList<>();
            AggregationOption aggregationOption = query.getAggregationOption();
            // this should not happen in production, since the check has been made in the handler
            if (query.getLimit() <= 0) {
                throw new IllegalArgumentException("The query limit cannot be less than 0");
            }
            // option LATEST and SUM.
            if (query.getLimit() < timeValues.size() && PARTITION_AGG_OPTIONS.contains(aggregationOption)) {
                int partitionSize = timeValues.size() / query.getLimit();
                int remainder = timeValues.size() % query.getLimit();
                // ignore the first reminderth data points
                for (List<TimeValue> interval : Iterables.partition(timeValues.subList(remainder, timeValues.size()), partitionSize)) {
                    // for LATEST we only need to get the last data point in the interval
                    if (aggregationOption.equals(AggregationOption.LATEST)) {
                        resultTimeValues.add(interval.get(interval.size() - 1));
                        continue;
                    }
                    // for SUM we want to sum up all the values in the interval
                    if (aggregationOption.equals(AggregationOption.SUM)) {
                        long sum = interval.stream().mapToLong(TimeValue::getValue).sum();
                        resultTimeValues.add(new TimeValue(interval.get(interval.size() - 1).getTimestamp(), sum));
                    }
                }
            } else {
                // TODO: CDAP-15565 remove the interpolation logic since it is never maintained and adds huge complexity
                PeekingIterator<TimeValue> timeValueItor = Iterators.peekingIterator(new TimeSeriesInterpolator(timeValues, query.getInterpolator(), query.getResolution()).iterator());
                while (timeValueItor.hasNext()) {
                    TimeValue timeValue = timeValueItor.next();
                    resultTimeValues.add(new TimeValue(timeValue.getTimestamp(), timeValue.getValue()));
                    if (++count >= query.getLimit()) {
                        break;
                    }
                }
            }
            result.add(new TimeSeries(measureEntry.getKey(), row.getKey(), resultTimeValues));
        }
    }
    return result;
}
Also used : TimeSeries(io.cdap.cdap.api.dataset.lib.cube.TimeSeries) ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue) AggregationOption(io.cdap.cdap.api.dataset.lib.cube.AggregationOption)

Example 28 with TimeValue

use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.

the class MetricsProcessorServiceTest method assertMetricsResult.

private void assertMetricsResult(MetricStore metricStore, Map<String, String> metricsContext, Map<String, Long> expected) {
    for (Map.Entry<String, Long> metric : expected.entrySet()) {
        Collection<MetricTimeSeries> queryResult = metricStore.query(new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metric.getKey(), AggregationFunction.SUM, metricsContext, ImmutableList.<String>of()));
        MetricTimeSeries timeSeries = Iterables.getOnlyElement(queryResult);
        List<TimeValue> timeValues = timeSeries.getTimeValues();
        TimeValue timeValue = Iterables.getOnlyElement(timeValues);
        Assert.assertEquals(String.format("Actual value of metric: %s does not match expected", metric.getKey()), metric.getValue().longValue(), timeValue.getValue());
    }
}
Also used : MetricTimeSeries(io.cdap.cdap.api.metrics.MetricTimeSeries) MetricDataQuery(io.cdap.cdap.api.metrics.MetricDataQuery) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue)

Example 29 with TimeValue

use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.

the class FactScanner method createIterator.

private Iterator<FactScanResult> createIterator() {
    return new AbstractIterator<FactScanResult>() {

        @Override
        protected FactScanResult computeNext() {
            Row rowResult;
            while ((rowResult = scanner.next()) != null) {
                rowScanned++;
                byte[] rowKey = rowResult.getRow();
                // Decode context and metric from key
                String measureName = codec.getMeasureName(rowKey);
                // if measureNames is empty we include all metrics
                if (!measureNames.isEmpty() && !measureNames.contains(measureName)) {
                    continue;
                }
                // todo: codec.getDimensionValues(rowKey) needs to un-encode dimension names which may result in read in
                // entity table (depending on the cache and its state). To avoid that, we can pass to scanner the
                // list of dimension names as we *always* know it (it is given) at the time of scanning
                List<DimensionValue> dimensionValues = codec.getDimensionValues(rowKey);
                boolean exhausted = false;
                List<TimeValue> timeValues = Lists.newLinkedList();
                // todo: entry set is ordered by ts?
                for (Map.Entry<byte[], byte[]> columnValue : rowResult.getColumns().entrySet()) {
                    long ts = codec.getTimestamp(rowKey, columnValue.getKey());
                    if (ts < startTs) {
                        continue;
                    }
                    if (ts > endTs) {
                        exhausted = true;
                        break;
                    }
                    // todo: move Bytes.toLong into codec?
                    TimeValue timeValue = new TimeValue(ts, Bytes.toLong(columnValue.getValue()));
                    timeValues.add(timeValue);
                }
                if (timeValues.isEmpty() && exhausted) {
                    break;
                }
                // todo: can return empty list, if all data is < startTs or > endTs
                return new FactScanResult(measureName, dimensionValues, timeValues);
            }
            scanner.close();
            return endOfData();
        }
    };
}
Also used : DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) AbstractIterator(com.google.common.collect.AbstractIterator) Row(io.cdap.cdap.api.dataset.table.Row) Map(java.util.Map) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue)

Example 30 with TimeValue

use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.

the class ProfileMetricServiceTest method getMetric.

private long getMetric(MetricStore metricStore, ProgramRunId programRunId, ProfileId profileId, String metricName) {
    Map<String, String> tags = ImmutableMap.<String, String>builder().put(Constants.Metrics.Tag.PROFILE_SCOPE, profileId.getScope().name()).put(Constants.Metrics.Tag.PROFILE, profileId.getProfile()).put(Constants.Metrics.Tag.NAMESPACE, programRunId.getNamespace()).put(Constants.Metrics.Tag.PROGRAM_TYPE, programRunId.getType().getPrettyName()).put(Constants.Metrics.Tag.APP, programRunId.getApplication()).put(Constants.Metrics.Tag.PROGRAM, programRunId.getProgram()).build();
    MetricDataQuery query = new MetricDataQuery(0, 0, Integer.MAX_VALUE, metricName, AggregationFunction.SUM, tags, new ArrayList<>());
    Collection<MetricTimeSeries> result = metricStore.query(query);
    if (result.isEmpty()) {
        return 0;
    }
    List<TimeValue> timeValues = result.iterator().next().getTimeValues();
    if (timeValues.isEmpty()) {
        return 0;
    }
    return timeValues.get(0).getValue();
}
Also used : MetricTimeSeries(io.cdap.cdap.api.metrics.MetricTimeSeries) MetricDataQuery(io.cdap.cdap.api.metrics.MetricDataQuery) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue)

Aggregations

TimeValue (io.cdap.cdap.api.dataset.lib.cube.TimeValue)44 MetricTimeSeries (io.cdap.cdap.api.metrics.MetricTimeSeries)26 MetricDataQuery (io.cdap.cdap.api.metrics.MetricDataQuery)18 ArrayList (java.util.ArrayList)18 Map (java.util.Map)12 Test (org.junit.Test)12 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)8 TimeSeries (io.cdap.cdap.api.dataset.lib.cube.TimeSeries)8 HashMap (java.util.HashMap)8 LinkedHashMap (java.util.LinkedHashMap)8 List (java.util.List)8 ImmutableList (com.google.common.collect.ImmutableList)6 CubeQuery (io.cdap.cdap.api.dataset.lib.cube.CubeQuery)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 AggregationFunction (io.cdap.cdap.api.dataset.lib.cube.AggregationFunction)4 AggregationOption (io.cdap.cdap.api.dataset.lib.cube.AggregationOption)4 Cube (io.cdap.cdap.api.dataset.lib.cube.Cube)4 CubeFact (io.cdap.cdap.api.dataset.lib.cube.CubeFact)4 InMemoryMetricsTable (io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)4 MetricQueryResult (io.cdap.cdap.proto.MetricQueryResult)4