Examples with Fact - io.cdap.cdap.data2.dataset2.lib.timeseries.Fact

Example 6 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method add.

public void add(List<Fact> facts) {
    // Simply collecting all rows/cols/values that need to be put to the underlying table.
    NavigableMap<byte[], NavigableMap<byte[], Long>> gaugesTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    NavigableMap<byte[], NavigableMap<byte[], Long>> incrementsTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    // this map is used to store metrics which was COUNTER type, but can be considered as GAUGE, which means it is
    // guaranteed to be a new row key in the underlying table.
    NavigableMap<byte[], NavigableMap<byte[], Long>> incGaugeTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    // this map is used to store the updated timestamp for the cache
    Map<FactCacheKey, Long> cacheUpdates = new HashMap<>();
    for (Fact fact : facts) {
        for (Measurement measurement : fact.getMeasurements()) {
            byte[] rowKey = codec.createRowKey(fact.getDimensionValues(), measurement.getName(), fact.getTimestamp());
            byte[] column = codec.createColumn(fact.getTimestamp());
            if (MeasureType.COUNTER == measurement.getType()) {
                if (factCounterCache != null) {
                    // round to the resolution timestamp
                    long tsToResolution = fact.getTimestamp() / resolution * resolution;
                    FactCacheKey cacheKey = new FactCacheKey(fact.getDimensionValues(), measurement.getName());
                    Long existingTs = factCounterCache.getIfPresent(cacheKey);
                    // cannot be considered as a gauge, and we should update the incrementsTable
                    if (existingTs == null || existingTs >= tsToResolution) {
                        inc(incrementsTable, rowKey, column, measurement.getValue());
                    // if the current ts is greater than existing ts, then we can consider this metric as a newly seen metric
                    // and perform gauge on this metric
                    } else {
                        inc(incGaugeTable, rowKey, column, measurement.getValue());
                    }
                    // should be updated
                    if (existingTs == null || existingTs < tsToResolution) {
                        cacheUpdates.compute(cacheKey, (key, oldValue) -> oldValue == null || tsToResolution > oldValue ? tsToResolution : oldValue);
                    }
                } else {
                    inc(incrementsTable, rowKey, column, measurement.getValue());
                }
            } else {
                gaugesTable.computeIfAbsent(rowKey, k -> Maps.newTreeMap(Bytes.BYTES_COMPARATOR)).put(column, measurement.getValue());
            }
        }
    }
    if (factCounterCache != null) {
        gaugesTable.putAll(incGaugeTable);
        factCounterCache.putAll(cacheUpdates);
    }
    // todo: replace with single call, to be able to optimize rpcs in underlying table
    timeSeriesTable.put(gaugesTable);
    timeSeriesTable.increment(incrementsTable);
    if (metrics != null) {
        metrics.increment(putCountMetric, gaugesTable.size());
        metrics.increment(incrementCountMetric, incrementsTable.size());
    }
}

Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Arrays(java.util.Arrays) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) LoggerFactory(org.slf4j.LoggerFactory) Bytes(io.cdap.cdap.api.common.Bytes) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter) HashMap(java.util.HashMap) MetricsTable(io.cdap.cdap.data2.dataset2.lib.table.MetricsTable) ArrayList(java.util.ArrayList) Row(io.cdap.cdap.api.dataset.table.Row) MetricsCollector(io.cdap.cdap.api.metrics.MetricsCollector) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Scanner(io.cdap.cdap.api.dataset.table.Scanner) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) NavigableMap(java.util.NavigableMap) Maps(com.google.common.collect.Maps) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) TreeMap(java.util.TreeMap) Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Closeable(java.io.Closeable) Preconditions(com.google.common.base.Preconditions) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) MeasureType(io.cdap.cdap.api.dataset.lib.cube.MeasureType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CacheBuilder(com.google.common.cache.CacheBuilder) Cache(com.google.common.cache.Cache) Comparator(java.util.Comparator) Collections(java.util.Collections) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap)

Example 7 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method findMeasureNames.

/**
 * Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
 * @param allDimensionNames list of all dimension names to be present in the fact record
 * @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
 * @param startTs start timestamp, in sec
 * @param endTs end timestamp, in sec
 * @return {@link Set} of measure names
 */
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
    List<DimensionValue> allDimensions = Lists.newArrayList();
    for (String dimensionName : allDimensionNames) {
        allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
    }
    byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
    byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, Collections.emptyList(), allDimensions), startRow);
    Set<String> measureNames = Sets.newHashSet();
    int scannedRecords = 0;
    try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
        Row rowResult;
        while ((rowResult = scanner.next()) != null) {
            scannedRecords++;
            if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
                break;
            }
            byte[] rowKey = rowResult.getRow();
            // filter out columns by time range (scan configuration only filters whole rows)
            if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
                continue;
            }
            if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
                // we're done with scanner
                break;
            }
            measureNames.add(codec.getMeasureName(rowResult.getRow()));
        }
    }
    LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
    return measureNames;
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Row(io.cdap.cdap.api.dataset.table.Row) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 8 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method getScanner.

private Scanner getScanner(FactScan scan) {
    // sort the measures based on their entity ids and based on that get the start and end row key metric names
    List<String> measureNames = getSortedMeasures(scan.getMeasureNames());
    byte[] startRow = codec.createStartRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(0), scan.getStartTs(), false);
    byte[] endRow = codec.createEndRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(measureNames.size() - 1), scan.getEndTs(), false);
    byte[][] columns;
    if (Arrays.equals(startRow, endRow)) {
        // If on the same timebase, we only need subset of columns
        long timeBase = scan.getStartTs() / rollTime * rollTime;
        int startCol = (int) (scan.getStartTs() - timeBase) / resolution;
        int endCol = (int) (scan.getEndTs() - timeBase) / resolution;
        columns = new byte[endCol - startCol + 1][];
        for (int i = 0; i < columns.length; i++) {
            columns[i] = Bytes.toBytes((short) (startCol + i));
        }
    }
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = measureNames.isEmpty() ? createFuzzyRowFilter(scan, startRow) : createFuzzyRowFilter(scan, measureNames);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Scanning fact table {} with scan: {}; constructed startRow: {}, endRow: {}, fuzzyRowFilter: {}", timeSeriesTable, scan, Bytes.toHexString(startRow), endRow == null ? null : Bytes.toHexString(endRow), fuzzyRowFilter);
    }
    return timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter);
}

Also used : FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 9 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DefaultCube method add.

@Override
public void add(Collection<? extends CubeFact> facts) {
    List<Fact> toWrite = Lists.newArrayList();
    int dimValuesCount = 0;
    for (CubeFact fact : facts) {
        for (Map.Entry<String, ? extends Aggregation> aggEntry : aggregations.entrySet()) {
            Aggregation agg = aggEntry.getValue();
            AggregationAlias aggregationAlias = null;
            if (aggregationAliasMap.containsKey(aggEntry.getKey())) {
                aggregationAlias = aggregationAliasMap.get(aggEntry.getKey());
            }
            if (agg.accept(fact)) {
                List<DimensionValue> dimensionValues = Lists.newArrayList();
                for (String dimensionName : agg.getDimensionNames()) {
                    String dimensionValueKey = aggregationAlias == null ? dimensionName : aggregationAlias.getAlias(dimensionName);
                    dimensionValues.add(new DimensionValue(dimensionName, fact.getDimensionValues().get(dimensionValueKey)));
                    dimValuesCount++;
                }
                toWrite.add(new Fact(fact.getTimestamp(), dimensionValues, fact.getMeasurements()));
            }
        }
    }
    for (FactTable table : resolutionToFactTable.values()) {
        table.add(toWrite);
    }
    incrementMetric("cube.cubeFact.add.request.count", 1);
    incrementMetric("cube.cubeFact.added.count", facts.size());
    incrementMetric("cube.tsFact.created.count", toWrite.size());
    incrementMetric("cube.tsFact.created.dimValues.count", dimValuesCount);
    incrementMetric("cube.tsFact.added.count", toWrite.size() * resolutionToFactTable.size());
}

Also used : CubeFact(co.cask.cdap.api.dataset.lib.cube.CubeFact) FactTable(co.cask.cdap.data2.dataset2.lib.timeseries.FactTable) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) CubeFact(co.cask.cdap.api.dataset.lib.cube.CubeFact) Fact(co.cask.cdap.data2.dataset2.lib.timeseries.Fact) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 10 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DefaultCube method delete.

@Override
public void delete(CubeDeleteQuery query) {
    // this may be very inefficient and its better to use TTL, this is to only support existing old functionality.
    List<DimensionValue> dimensionValues = Lists.newArrayList();
    // use the dimension values of the aggregation to delete entries in all the fact-tables.
    for (Aggregation agg : aggregations.values()) {
        if (agg.getDimensionNames().containsAll(query.getDimensionValues().keySet())) {
            dimensionValues.clear();
            for (String dimensionName : agg.getDimensionNames()) {
                dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
            }
            FactTable factTable = resolutionToFactTable.get(query.getResolution());
            FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasureNames(), dimensionValues);
            factTable.delete(scan);
        }
    }
}

Also used : FactScan(co.cask.cdap.data2.dataset2.lib.timeseries.FactScan) FactTable(co.cask.cdap.data2.dataset2.lib.timeseries.FactTable) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue)

Aggregations

DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)7 DimensionValue (co.cask.cdap.api.dataset.lib.cube.DimensionValue)5 Test (org.junit.Test)5 Measurement (io.cdap.cdap.api.dataset.lib.cube.Measurement)4 ImmutableList (com.google.common.collect.ImmutableList)3 Row (io.cdap.cdap.api.dataset.table.Row)3 Scanner (io.cdap.cdap.api.dataset.table.Scanner)3 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)3 InMemoryMetricsTable (io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Map (java.util.Map)3 Measurement (co.cask.cdap.api.dataset.lib.cube.Measurement)2 Row (co.cask.cdap.api.dataset.table.Row)2 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)2 InMemoryMetricsTable (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)2 FactTable (co.cask.cdap.data2.dataset2.lib.timeseries.FactTable)2 FactTable (io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable)2