use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class FactTable method add.
public void add(List<Fact> facts) {
// Simply collecting all rows/cols/values that need to be put to the underlying table.
NavigableMap<byte[], NavigableMap<byte[], Long>> gaugesTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
NavigableMap<byte[], NavigableMap<byte[], Long>> incrementsTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
// this map is used to store metrics which was COUNTER type, but can be considered as GAUGE, which means it is
// guaranteed to be a new row key in the underlying table.
NavigableMap<byte[], NavigableMap<byte[], Long>> incGaugeTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
// this map is used to store the updated timestamp for the cache
Map<FactCacheKey, Long> cacheUpdates = new HashMap<>();
for (Fact fact : facts) {
for (Measurement measurement : fact.getMeasurements()) {
byte[] rowKey = codec.createRowKey(fact.getDimensionValues(), measurement.getName(), fact.getTimestamp());
byte[] column = codec.createColumn(fact.getTimestamp());
if (MeasureType.COUNTER == measurement.getType()) {
if (factCounterCache != null) {
// round to the resolution timestamp
long tsToResolution = fact.getTimestamp() / resolution * resolution;
FactCacheKey cacheKey = new FactCacheKey(fact.getDimensionValues(), measurement.getName());
Long existingTs = factCounterCache.getIfPresent(cacheKey);
// cannot be considered as a gauge, and we should update the incrementsTable
if (existingTs == null || existingTs >= tsToResolution) {
inc(incrementsTable, rowKey, column, measurement.getValue());
// if the current ts is greater than existing ts, then we can consider this metric as a newly seen metric
// and perform gauge on this metric
} else {
inc(incGaugeTable, rowKey, column, measurement.getValue());
}
// should be updated
if (existingTs == null || existingTs < tsToResolution) {
cacheUpdates.compute(cacheKey, (key, oldValue) -> oldValue == null || tsToResolution > oldValue ? tsToResolution : oldValue);
}
} else {
inc(incrementsTable, rowKey, column, measurement.getValue());
}
} else {
gaugesTable.computeIfAbsent(rowKey, k -> Maps.newTreeMap(Bytes.BYTES_COMPARATOR)).put(column, measurement.getValue());
}
}
}
if (factCounterCache != null) {
gaugesTable.putAll(incGaugeTable);
factCounterCache.putAll(cacheUpdates);
}
// todo: replace with single call, to be able to optimize rpcs in underlying table
timeSeriesTable.put(gaugesTable);
timeSeriesTable.increment(incrementsTable);
if (metrics != null) {
metrics.increment(putCountMetric, gaugesTable.size());
metrics.increment(incrementCountMetric, incrementsTable.size());
}
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class FactTable method findMeasureNames.
/**
* Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
* @param allDimensionNames list of all dimension names to be present in the fact record
* @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
* @param startTs start timestamp, in sec
* @param endTs end timestamp, in sec
* @return {@link Set} of measure names
*/
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
List<DimensionValue> allDimensions = Lists.newArrayList();
for (String dimensionName : allDimensionNames) {
allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
}
byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
endRow = Bytes.stopKeyForPrefix(endRow);
FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, Collections.emptyList(), allDimensions), startRow);
Set<String> measureNames = Sets.newHashSet();
int scannedRecords = 0;
try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
Row rowResult;
while ((rowResult = scanner.next()) != null) {
scannedRecords++;
if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
break;
}
byte[] rowKey = rowResult.getRow();
// filter out columns by time range (scan configuration only filters whole rows)
if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
continue;
}
if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
// we're done with scanner
break;
}
measureNames.add(codec.getMeasureName(rowResult.getRow()));
}
}
LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
return measureNames;
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class FactTable method getScanner.
private Scanner getScanner(FactScan scan) {
// sort the measures based on their entity ids and based on that get the start and end row key metric names
List<String> measureNames = getSortedMeasures(scan.getMeasureNames());
byte[] startRow = codec.createStartRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(0), scan.getStartTs(), false);
byte[] endRow = codec.createEndRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(measureNames.size() - 1), scan.getEndTs(), false);
byte[][] columns;
if (Arrays.equals(startRow, endRow)) {
// If on the same timebase, we only need subset of columns
long timeBase = scan.getStartTs() / rollTime * rollTime;
int startCol = (int) (scan.getStartTs() - timeBase) / resolution;
int endCol = (int) (scan.getEndTs() - timeBase) / resolution;
columns = new byte[endCol - startCol + 1][];
for (int i = 0; i < columns.length; i++) {
columns[i] = Bytes.toBytes((short) (startCol + i));
}
}
endRow = Bytes.stopKeyForPrefix(endRow);
FuzzyRowFilter fuzzyRowFilter = measureNames.isEmpty() ? createFuzzyRowFilter(scan, startRow) : createFuzzyRowFilter(scan, measureNames);
if (LOG.isTraceEnabled()) {
LOG.trace("Scanning fact table {} with scan: {}; constructed startRow: {}, endRow: {}, fuzzyRowFilter: {}", timeSeriesTable, scan, Bytes.toHexString(startRow), endRow == null ? null : Bytes.toHexString(endRow), fuzzyRowFilter);
}
return timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter);
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class DefaultCube method add.
@Override
public void add(Collection<? extends CubeFact> facts) {
List<Fact> toWrite = Lists.newArrayList();
int dimValuesCount = 0;
for (CubeFact fact : facts) {
for (Map.Entry<String, ? extends Aggregation> aggEntry : aggregations.entrySet()) {
Aggregation agg = aggEntry.getValue();
AggregationAlias aggregationAlias = null;
if (aggregationAliasMap.containsKey(aggEntry.getKey())) {
aggregationAlias = aggregationAliasMap.get(aggEntry.getKey());
}
if (agg.accept(fact)) {
List<DimensionValue> dimensionValues = Lists.newArrayList();
for (String dimensionName : agg.getDimensionNames()) {
String dimensionValueKey = aggregationAlias == null ? dimensionName : aggregationAlias.getAlias(dimensionName);
dimensionValues.add(new DimensionValue(dimensionName, fact.getDimensionValues().get(dimensionValueKey)));
dimValuesCount++;
}
toWrite.add(new Fact(fact.getTimestamp(), dimensionValues, fact.getMeasurements()));
}
}
}
for (FactTable table : resolutionToFactTable.values()) {
table.add(toWrite);
}
incrementMetric("cube.cubeFact.add.request.count", 1);
incrementMetric("cube.cubeFact.added.count", facts.size());
incrementMetric("cube.tsFact.created.count", toWrite.size());
incrementMetric("cube.tsFact.created.dimValues.count", dimValuesCount);
incrementMetric("cube.tsFact.added.count", toWrite.size() * resolutionToFactTable.size());
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class DefaultCube method delete.
@Override
public void delete(CubeDeleteQuery query) {
// this may be very inefficient and its better to use TTL, this is to only support existing old functionality.
List<DimensionValue> dimensionValues = Lists.newArrayList();
// use the dimension values of the aggregation to delete entries in all the fact-tables.
for (Aggregation agg : aggregations.values()) {
if (agg.getDimensionNames().containsAll(query.getDimensionValues().keySet())) {
dimensionValues.clear();
for (String dimensionName : agg.getDimensionNames()) {
dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
}
FactTable factTable = resolutionToFactTable.get(query.getResolution());
FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasureNames(), dimensionValues);
factTable.delete(scan);
}
}
}
Aggregations