Search in sources :

Example 16 with DimensionValue

use of io.cdap.cdap.api.dataset.lib.cube.DimensionValue in project cdap by caskdata.

the class FactTableTest method testPreSplits.

@Test
public void testPreSplits() throws Exception {
    InMemoryTableService.create("presplitEntityTable");
    InMemoryTableService.create("presplitDataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
    byte[][] splits = FactTable.getSplits(3);
    long ts = System.currentTimeMillis() / 1000;
    DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
    DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
    DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
    // first agg view: dim1
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // second agg view: dim1 & dim2
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // third agg view: dim3
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // Verify all written records are spread across splits
    Scanner scanner = metricsTable.scan(null, null, null);
    Row row;
    Set<Integer> splitsWithRows = Sets.newHashSet();
    while ((row = scanner.next()) != null) {
        boolean added = false;
        for (int i = 0; i < splits.length; i++) {
            if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
                splitsWithRows.add(i);
                added = true;
                break;
            }
        }
        if (!added) {
            // falls into last split
            splitsWithRows.add(splits.length);
        }
    }
    Assert.assertEquals(3, splitsWithRows.size());
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Scanner(io.cdap.cdap.api.dataset.table.Scanner) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 17 with DimensionValue

use of io.cdap.cdap.api.dataset.lib.cube.DimensionValue in project cdap by caskdata.

the class FactTableTest method testBasics.

@Test
public void testBasics() throws Exception {
    InMemoryTableService.create("EntityTable");
    InMemoryTableService.create("DataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
    // aligned to start of resolution bucket
    // "/1000" because time is expected to be in seconds
    long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
    // testing encoding with multiple dims
    List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
    // trying adding one by one, in same (first) time resolution bucket
    for (int i = 0; i < 5; i++) {
        for (int k = 1; k < 4; k++) {
            // note: "+i" here and below doesn't affect results, just to confirm
            // that data points are rounded to the resolution
            table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
        }
    }
    // trying adding one by one, in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
        }
    }
    // trying adding as list
    // first incs in same (second) time resolution bucket
    List<Fact> aggs = Lists.newArrayList();
    for (int i = 0; i < 7; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
        }
    }
    // then incs in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
        }
    }
    table.add(aggs);
    // verify each metric
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
        assertScan(table, expected, scan);
    }
    // verify each metric within a single timeBase
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
        assertScan(table, expected, scan);
    }
    // verify all metrics with fuzzy metric in scan
    Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
    }
    // metric = null means "all"
    FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metric test
    expected.clear();
    // delete the metrics data at (timestamp + 20) resolution
    scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
    table.delete(scan);
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
    }
    // verify deletion
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metrics for "metric1" at ts0 and verify deletion
    scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
    table.delete(scan);
    expected.clear();
    expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
    assertScan(table, expected, scan);
    // verify the next dims search
    Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    Map<String, String> slice = Maps.newHashMap();
    slice.put("dim1", null);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
    // add new dim values
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
    // search for metric names given dims list and verify
    Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue) Test(org.junit.Test)

Example 18 with DimensionValue

use of io.cdap.cdap.api.dataset.lib.cube.DimensionValue in project cdap by caskdata.

the class DefaultMetricStore method findNextAvailableTags.

@Override
public Collection<TagValue> findNextAvailableTags(MetricSearchQuery query) {
    Collection<DimensionValue> tags = cube.get().findDimensionValues(buildCubeSearchQuery(query));
    Collection<TagValue> result = Lists.newArrayList();
    for (DimensionValue dimensionValue : tags) {
        result.add(new TagValue(dimensionValue.getName(), dimensionValue.getValue()));
    }
    return result;
}
Also used : DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) TagValue(io.cdap.cdap.api.metrics.TagValue)

Example 19 with DimensionValue

use of io.cdap.cdap.api.dataset.lib.cube.DimensionValue in project cdap by caskdata.

the class DefaultCube method getTimeSeries.

private Table<Map<String, String>, String, Map<Long, Long>> getTimeSeries(CubeQuery query, FactScanner scanner) {
    // {dimension values, measure} -> {time -> value}s
    Table<Map<String, String>, String, Map<Long, Long>> result = HashBasedTable.create();
    int count = 0;
    while (scanner.hasNext()) {
        FactScanResult next = scanner.next();
        incrementMetric("cube.query.scan.records.count", 1);
        boolean skip = false;
        // using tree map, as we are using it as a key for a map
        Map<String, String> seriesDimensions = Maps.newTreeMap();
        for (String dimensionName : query.getGroupByDimensions()) {
            // todo: use Map<String, String> instead of List<DimensionValue> into a String, String, everywhere
            for (DimensionValue dimensionValue : next.getDimensionValues()) {
                if (dimensionName.equals(dimensionValue.getName())) {
                    if (dimensionValue.getValue() == null) {
                        // Currently, we do NOT return null as grouped by value.
                        // Depending on whether dimension is required or not the records with null value in it may or may not be
                        // in aggregation. At this moment, the choosing of the aggregation for query doesn't look at this, so
                        // potentially null may or may not be included in results, depending on the aggregation selected
                        // querying. We don't want to produce inconsistent results varying due to different aggregations selected,
                        // so don't return nulls in any of those cases.
                        skip = true;
                        continue;
                    }
                    seriesDimensions.put(dimensionName, dimensionValue.getValue());
                    break;
                }
            }
        }
        if (skip) {
            incrementMetric("cube.query.scan.skipped.count", 1);
            continue;
        }
        for (TimeValue timeValue : next) {
            Map<Long, Long> timeValues = result.get(seriesDimensions, next.getMeasureName());
            if (timeValues == null) {
                result.put(seriesDimensions, next.getMeasureName(), Maps.<Long, Long>newHashMap());
            }
            AggregationFunction function = query.getMeasurements().get(next.getMeasureName());
            if (AggregationFunction.SUM == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value == null ? 0 : value;
                value += timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.MAX == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value != null && value > timeValue.getValue() ? value : timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.MIN == function) {
                Long value = result.get(seriesDimensions, next.getMeasureName()).get(timeValue.getTimestamp());
                value = value != null && value < timeValue.getValue() ? value : timeValue.getValue();
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), value);
            } else if (AggregationFunction.LATEST == function) {
                result.get(seriesDimensions, next.getMeasureName()).put(timeValue.getTimestamp(), timeValue.getValue());
            } else {
                // should never happen: developer error
                throw new RuntimeException("Unknown MeasureType: " + function);
            }
        }
        if (++count >= MAX_RECORDS_TO_SCAN) {
            break;
        }
    }
    return result;
}
Also used : AggregationFunction(io.cdap.cdap.api.dataset.lib.cube.AggregationFunction) FactScanResult(io.cdap.cdap.data2.dataset2.lib.timeseries.FactScanResult) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue)

Example 20 with DimensionValue

use of io.cdap.cdap.api.dataset.lib.cube.DimensionValue in project cdap by caskdata.

the class DefaultCube method findMeasureNames.

@Override
public Collection<String> findMeasureNames(CubeExploreQuery query) {
    LOG.trace("Searching for measures, query: {}", query);
    // In each aggregation that matches given dimensions, try to find measure names
    SortedSet<String> result = Sets.newTreeSet();
    // todo: the passed query should have map instead
    LinkedHashMap<String, String> slice = Maps.newLinkedHashMap();
    for (DimensionValue dimensionValue : query.getDimensionValues()) {
        slice.put(dimensionValue.getName(), dimensionValue.getValue());
    }
    FactTable table = resolutionToFactTable.get(query.getResolution());
    for (Aggregation agg : aggregations.values()) {
        if (agg.getDimensionNames().containsAll(slice.keySet())) {
            result.addAll(table.findMeasureNames(agg.getDimensionNames(), slice, query.getStartTs(), query.getEndTs()));
        }
    }
    return result;
}
Also used : FactTable(io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue)

Aggregations

DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)40 Test (org.junit.Test)12 InMemoryMetricsTable (io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)10 FactTable (io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable)10 TimeValue (io.cdap.cdap.api.dataset.lib.cube.TimeValue)8 Row (io.cdap.cdap.api.dataset.table.Row)8 ArrayList (java.util.ArrayList)8 Map (java.util.Map)8 Measurement (io.cdap.cdap.api.dataset.lib.cube.Measurement)6 Scanner (io.cdap.cdap.api.dataset.table.Scanner)6 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)6 HashMap (java.util.HashMap)6 LinkedHashMap (java.util.LinkedHashMap)6 CubeFact (io.cdap.cdap.api.dataset.lib.cube.CubeFact)4 TimeSeries (io.cdap.cdap.api.dataset.lib.cube.TimeSeries)4 FactScan (io.cdap.cdap.data2.dataset2.lib.timeseries.FactScan)4 URL (java.net.URL)4 AbstractIterator (com.google.common.collect.AbstractIterator)2 ImmutableList (com.google.common.collect.ImmutableList)2 TypeToken (com.google.gson.reflect.TypeToken)2