use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.
the class DefaultCube method getTimeSeries.
private Table<Map<String, String>, String, Map<Long, Long>> getTimeSeries(CubeQuery query, FactScanner scanner) {
// {dimension values, measure} -> {time -> value}s
Table<Map<String, String>, String, Map<Long, Long>> result = HashBasedTable.create();
int count = 0;
while (scanner.hasNext()) {
FactScanResult next = scanner.next();
incrementMetric("cube.query.scan.records.count", 1);
boolean skip = false;
// using tree map, as we are using it as a key for a map
Map<String, String> seriesDimensions = Maps.newTreeMap();
for (String dimensionName : query.getGroupByDimensions()) {
// todo: use Map<String, String> instead of List<DimensionValue> into a String, String, everywhere
for (DimensionValue dimensionValue : next.getDimensionValues()) {
if (dimensionName.equals(dimensionValue.getName())) {
if (dimensionValue.getValue() == null) {
// Currently, we do NOT return null as grouped by value.
// Depending on whether dimension is required or not the records with null value in it may or may not be
// in aggregation. At this moment, the choosing of the aggregation for query doesn't look at this, so
// potentially null may or may not be included in results, depending on the aggregation selected
// querying. We don't want to produce inconsistent results varying due to different aggregations selected,
// so don't return nulls in any of those cases.
skip = true;
continue;
}
seriesDimensions.put(dimensionName, dimensionValue.getValue());
break;
}
}
}
if (skip) {
incrementMetric("cube.query.scan.skipped.count", 1);
continue;
}
for (TimeValue timeValue : next) {
Map<Long, Long> timeValues = result.get(seriesDimensions, next.getMeasureName());
if (timeValues == null) {
result.put(seriesDimensions, next.getMeasureName(), Maps.<Long, Long>newHashMap());
}
AggregationFunction function = query.getMeasurements().get(next.getMeasureName());
if (AggregationFunction.SUM == function) {
Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
value = value == null ? 0 : value;
value += timeValue.getValue();
result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
} else if (AggregationFunction.MAX == function) {
Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
value = value != null && value > timeValue.getValue() ? value : timeValue.getValue();
result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
} else if (AggregationFunction.MIN == function) {
Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
value = value != null && value < timeValue.getValue() ? value : timeValue.getValue();
result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
} else if (AggregationFunction.LATEST == function) {
result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), timeValue.getValue());
} else {
// should never happen: developer error
throw new RuntimeException("Unknown MeasureType: " + function);
}
}
if (++count >= MAX_RECORDS_TO_SCAN) {
break;
}
}
return result;
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.
the class DefaultCube method convertToQueryResult.
private Collection<TimeSeries> convertToQueryResult(CubeQuery query, Table<Map<String, String>, String, Map<Long, Long>> resultTable) {
List<TimeSeries> result = new ArrayList<>();
// iterating each groupValue dimensions
for (Map.Entry<Map<String, String>, Map<String, Map<Long, Long>>> row : resultTable.rowMap().entrySet()) {
// iterating each measure
for (Map.Entry<String, Map<Long, Long>> measureEntry : row.getValue().entrySet()) {
// generating time series for a grouping and a measure
int count = 0;
List<TimeValue> timeValues = new ArrayList<>();
for (Map.Entry<Long, Long> timeValue : measureEntry.getValue().entrySet()) {
timeValues.add(new TimeValue(timeValue.getKey(), timeValue.getValue()));
}
Collections.sort(timeValues);
List<TimeValue> resultTimeValues = new ArrayList<>();
AggregationOption aggregationOption = query.getAggregationOption();
// this should not happen in production, since the check has been made in the handler
if (query.getLimit() <= 0) {
throw new IllegalArgumentException("The query limit cannot be less than 0");
}
// option LATEST and SUM.
if (query.getLimit() < timeValues.size() && PARTITION_AGG_OPTIONS.contains(aggregationOption)) {
int partitionSize = timeValues.size() / query.getLimit();
int remainder = timeValues.size() % query.getLimit();
// ignore the first reminderth data points
for (List<TimeValue> interval : Iterables.partition(timeValues.subList(remainder, timeValues.size()), partitionSize)) {
// for LATEST we only need to get the last data point in the interval
if (aggregationOption.equals(AggregationOption.LATEST)) {
resultTimeValues.add(interval.get(interval.size() - 1));
continue;
}
// for SUM we want to sum up all the values in the interval
if (aggregationOption.equals(AggregationOption.SUM)) {
long sum = interval.stream().mapToLong(TimeValue::getValue).sum();
resultTimeValues.add(new TimeValue(interval.get(interval.size() - 1).getTimestamp(), sum));
}
}
} else {
// TODO: CDAP-15565 remove the interpolation logic since it is never maintained and adds huge complexity
PeekingIterator<TimeValue> timeValueItor = Iterators.peekingIterator(new TimeSeriesInterpolator(timeValues, query.getInterpolator(), query.getResolution()).iterator());
while (timeValueItor.hasNext()) {
TimeValue timeValue = timeValueItor.next();
resultTimeValues.add(new TimeValue(timeValue.getTimestamp(), timeValue.getValue()));
if (++count >= query.getLimit()) {
break;
}
}
}
result.add(new TimeSeries(measureEntry.getKey(), row.getKey(), resultTimeValues));
}
}
return result;
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.
the class MetricsProcessorServiceTest method assertMetricsResult.
private void assertMetricsResult(MetricStore metricStore, Map<String, String> metricsContext, Map<String, Long> expected) {
for (Map.Entry<String, Long> metric : expected.entrySet()) {
Collection<MetricTimeSeries> queryResult = metricStore.query(new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metric.getKey(), AggregationFunction.SUM, metricsContext, ImmutableList.<String>of()));
MetricTimeSeries timeSeries = Iterables.getOnlyElement(queryResult);
List<TimeValue> timeValues = timeSeries.getTimeValues();
TimeValue timeValue = Iterables.getOnlyElement(timeValues);
Assert.assertEquals(String.format("Actual value of metric: %s does not match expected", metric.getKey()), metric.getValue().longValue(), timeValue.getValue());
}
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.
the class FactScanner method createIterator.
private Iterator<FactScanResult> createIterator() {
return new AbstractIterator<FactScanResult>() {
@Override
protected FactScanResult computeNext() {
Row rowResult;
while ((rowResult = scanner.next()) != null) {
rowScanned++;
byte[] rowKey = rowResult.getRow();
// Decode context and metric from key
String measureName = codec.getMeasureName(rowKey);
// if measureNames is empty we include all metrics
if (!measureNames.isEmpty() && !measureNames.contains(measureName)) {
continue;
}
// todo: codec.getDimensionValues(rowKey) needs to un-encode dimension names which may result in read in
// entity table (depending on the cache and its state). To avoid that, we can pass to scanner the
// list of dimension names as we *always* know it (it is given) at the time of scanning
List<DimensionValue> dimensionValues = codec.getDimensionValues(rowKey);
boolean exhausted = false;
List<TimeValue> timeValues = Lists.newLinkedList();
// todo: entry set is ordered by ts?
for (Map.Entry<byte[], byte[]> columnValue : rowResult.getColumns().entrySet()) {
long ts = codec.getTimestamp(rowKey, columnValue.getKey());
if (ts < startTs) {
continue;
}
if (ts > endTs) {
exhausted = true;
break;
}
// todo: move Bytes.toLong into codec?
TimeValue timeValue = new TimeValue(ts, Bytes.toLong(columnValue.getValue()));
timeValues.add(timeValue);
}
if (timeValues.isEmpty() && exhausted) {
break;
}
// todo: can return empty list, if all data is < startTs or > endTs
return new FactScanResult(measureName, dimensionValues, timeValues);
}
scanner.close();
return endOfData();
}
};
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeValue in project cdap by caskdata.
the class ProfileMetricServiceTest method getMetric.
private long getMetric(MetricStore metricStore, ProgramRunId programRunId, ProfileId profileId, String metricName) {
Map<String, String> tags = ImmutableMap.<String, String>builder().put(Constants.Metrics.Tag.PROFILE_SCOPE, profileId.getScope().name()).put(Constants.Metrics.Tag.PROFILE, profileId.getProfile()).put(Constants.Metrics.Tag.NAMESPACE, programRunId.getNamespace()).put(Constants.Metrics.Tag.PROGRAM_TYPE, programRunId.getType().getPrettyName()).put(Constants.Metrics.Tag.APP, programRunId.getApplication()).put(Constants.Metrics.Tag.PROGRAM, programRunId.getProgram()).build();
MetricDataQuery query = new MetricDataQuery(0, 0, Integer.MAX_VALUE, metricName, AggregationFunction.SUM, tags, new ArrayList<>());
Collection<MetricTimeSeries> result = metricStore.query(query);
if (result.isEmpty()) {
return 0;
}
List<TimeValue> timeValues = result.iterator().next().getTimeValues();
if (timeValues.isEmpty()) {
return 0;
}
return timeValues.get(0).getValue();
}
Aggregations