Search in sources :

Example 1 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method getScanner.

private Scanner getScanner(FactScan scan) {
    // sort the measures based on their entity ids and based on that get the start and end row key metric names
    List<String> measureNames = getSortedMeasures(scan.getMeasureNames());
    byte[] startRow = codec.createStartRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(0), scan.getStartTs(), false);
    byte[] endRow = codec.createEndRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(measureNames.size() - 1), scan.getEndTs(), false);
    byte[][] columns;
    if (Arrays.equals(startRow, endRow)) {
        // If on the same timebase, we only need subset of columns
        long timeBase = scan.getStartTs() / rollTime * rollTime;
        int startCol = (int) (scan.getStartTs() - timeBase) / resolution;
        int endCol = (int) (scan.getEndTs() - timeBase) / resolution;
        columns = new byte[endCol - startCol + 1][];
        for (int i = 0; i < columns.length; i++) {
            columns[i] = Bytes.toBytes((short) (startCol + i));
        }
    }
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = measureNames.isEmpty() ? createFuzzyRowFilter(scan, startRow) : createFuzzyRowFilter(scan, measureNames);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Scanning fact table {} with scan: {}; constructed startRow: {}, endRow: {}, fuzzyRowFilter: {}", timeSeriesTable, scan, toPrettyLog(startRow), toPrettyLog(endRow), fuzzyRowFilter);
    }
    return timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter);
}
Also used : FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 2 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method findMeasureNames.

/**
 * Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
 * @param allDimensionNames list of all dimension names to be present in the fact record
 * @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
 * @param startTs start timestamp, in sec
 * @param endTs end timestamp, in sec
 * @return {@link Set} of measure names
 */
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
    List<DimensionValue> allDimensions = Lists.newArrayList();
    for (String dimensionName : allDimensionNames) {
        allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
    }
    byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
    byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, ImmutableList.<String>of(), allDimensions), startRow);
    Set<String> measureNames = Sets.newHashSet();
    int scannedRecords = 0;
    try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
        Row rowResult;
        while ((rowResult = scanner.next()) != null) {
            scannedRecords++;
            if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
                break;
            }
            byte[] rowKey = rowResult.getRow();
            // filter out columns by time range (scan configuration only filters whole rows)
            if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
                continue;
            }
            if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
                // we're done with scanner
                break;
            }
            measureNames.add(codec.getMeasureName(rowResult.getRow()));
        }
    }
    LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
    return measureNames;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) Row(co.cask.cdap.api.dataset.table.Row) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 3 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testPreSplits.

@Test
public void testPreSplits() throws Exception {
    InMemoryTableService.create("presplitEntityTable");
    InMemoryTableService.create("presplitDataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
    byte[][] splits = FactTable.getSplits(3);
    long ts = System.currentTimeMillis() / 1000;
    DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
    DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
    DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
    // first agg view: dim1
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // second agg view: dim1 & dim2
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // third agg view: dim3
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // Verify all written records are spread across splits
    Scanner scanner = metricsTable.scan(null, null, null);
    Row row;
    Set<Integer> splitsWithRows = Sets.newHashSet();
    while ((row = scanner.next()) != null) {
        boolean added = false;
        for (int i = 0; i < splits.length; i++) {
            if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
                splitsWithRows.add(i);
                added = true;
                break;
            }
        }
        if (!added) {
            // falls into last split
            splitsWithRows.add(splits.length);
        }
    }
    Assert.assertEquals(3, splitsWithRows.size());
}
Also used : Measurement(co.cask.cdap.api.dataset.lib.cube.Measurement) Scanner(co.cask.cdap.api.dataset.table.Scanner) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 4 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testBasics.

@Test
public void testBasics() throws Exception {
    InMemoryTableService.create("EntityTable");
    InMemoryTableService.create("DataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
    // aligned to start of resolution bucket
    // "/1000" because time is expected to be in seconds
    long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
    // testing encoding with multiple dims
    List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
    // trying adding one by one, in same (first) time resolution bucket
    for (int i = 0; i < 5; i++) {
        for (int k = 1; k < 4; k++) {
            // note: "+i" here and below doesn't affect results, just to confirm
            // that data points are rounded to the resolution
            table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
        }
    }
    // trying adding one by one, in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
        }
    }
    // trying adding as list
    // first incs in same (second) time resolution bucket
    List<Fact> aggs = Lists.newArrayList();
    for (int i = 0; i < 7; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
        }
    }
    // then incs in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
        }
    }
    table.add(aggs);
    // verify each metric
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
        assertScan(table, expected, scan);
    }
    // verify each metric within a single timeBase
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
        assertScan(table, expected, scan);
    }
    // verify all metrics with fuzzy metric in scan
    Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
    }
    // metric = null means "all"
    FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metric test
    expected.clear();
    // delete the metrics data at (timestamp + 20) resolution
    scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
    table.delete(scan);
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
    }
    // verify deletion
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metrics for "metric1" at ts0 and verify deletion
    scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
    table.delete(scan);
    expected.clear();
    expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
    assertScan(table, expected, scan);
    // verify the next dims search
    Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    Map<String, String> slice = Maps.newHashMap();
    slice.put("dim1", null);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
    // add new dim values
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
    // search for metric names given dims list and verify
    Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
Also used : Measurement(co.cask.cdap.api.dataset.lib.cube.Measurement) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TimeValue(co.cask.cdap.api.dataset.lib.cube.TimeValue) Test(org.junit.Test)

Example 5 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testCache.

@Test
public void testCache() throws Exception {
    String tableName = "testCacheTable";
    String entityTableName = "testCacheEntityTable";
    InMemoryTableService.create(tableName);
    InMemoryTableService.create(entityTableName);
    int resolution = 5;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable(tableName);
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable(entityTableName)), resolution, 2);
    // set the metrics collector for the table
    FactTableMetricsCollector metricsCollector = new FactTableMetricsCollector(resolution);
    table.setMetricsCollector(metricsCollector);
    // Initially the cache should be empty
    Assert.assertEquals(0, table.getFactCounterCache().size());
    // add some value with current ts
    long timestampNow = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()) / resolution * resolution;
    List<DimensionValue> dims = dimValues("dim1", "dim2");
    List<Fact> metrics = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        metrics.add(new Fact(timestampNow, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
    }
    table.add(metrics);
    // Since no previous add to the metric store, these increment should still be considered as COUNTER, and the
    // cache should be updated.
    Assert.assertEquals(10, metricsCollector.getLastIncrementSize());
    Assert.assertEquals(0, metricsCollector.getLastGaugeSize());
    Assert.assertEquals(10, table.getFactCounterCache().size());
    for (long value : table.getFactCounterCache().asMap().values()) {
        Assert.assertEquals(timestampNow, value);
    }
    // Add metrics older than the current timestamp, these increment should still be considered as COUNTER, and the
    // cache should NOT be updated.
    metrics = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        metrics.add(new Fact(timestampNow - 5, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
    }
    table.add(metrics);
    Assert.assertEquals(10, metricsCollector.getLastIncrementSize());
    Assert.assertEquals(0, metricsCollector.getLastGaugeSize());
    Assert.assertEquals(10, table.getFactCounterCache().size());
    for (long value : table.getFactCounterCache().asMap().values()) {
        Assert.assertEquals(timestampNow, value);
    }
    // Now insert metrics newer than the current timestamp, the increment should be considered as GAUGE, and the cache
    // should be updated
    metrics = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        metrics.add(new Fact(timestampNow + 5, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
    }
    table.add(metrics);
    Assert.assertEquals(0, metricsCollector.getLastIncrementSize());
    Assert.assertEquals(10, metricsCollector.getLastGaugeSize());
    Assert.assertEquals(10, table.getFactCounterCache().size());
    for (long value : table.getFactCounterCache().asMap().values()) {
        Assert.assertEquals(timestampNow + 5, value);
    }
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) ArrayList(java.util.ArrayList) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Test(org.junit.Test)

Aggregations

DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)7 DimensionValue (co.cask.cdap.api.dataset.lib.cube.DimensionValue)5 Test (org.junit.Test)5 Measurement (io.cdap.cdap.api.dataset.lib.cube.Measurement)4 ImmutableList (com.google.common.collect.ImmutableList)3 Row (io.cdap.cdap.api.dataset.table.Row)3 Scanner (io.cdap.cdap.api.dataset.table.Scanner)3 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)3 InMemoryMetricsTable (io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Map (java.util.Map)3 Measurement (co.cask.cdap.api.dataset.lib.cube.Measurement)2 Row (co.cask.cdap.api.dataset.table.Row)2 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)2 InMemoryMetricsTable (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)2 FactTable (co.cask.cdap.data2.dataset2.lib.timeseries.FactTable)2 FactTable (io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable)2