use of co.cask.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testPreSplits.
@Test
public void testPreSplits() throws Exception {
InMemoryTableService.create("presplitEntityTable");
InMemoryTableService.create("presplitDataTable");
int resolution = 10;
int rollTimebaseInterval = 2;
InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
byte[][] splits = FactTable.getSplits(3);
long ts = System.currentTimeMillis() / 1000;
DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
// first agg view: dim1
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
// second agg view: dim1 & dim2
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
// third agg view: dim3
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
// Verify all written records are spread across splits
Scanner scanner = metricsTable.scan(null, null, null);
Row row;
Set<Integer> splitsWithRows = Sets.newHashSet();
while ((row = scanner.next()) != null) {
boolean added = false;
for (int i = 0; i < splits.length; i++) {
if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
splitsWithRows.add(i);
added = true;
break;
}
}
if (!added) {
// falls into last split
splitsWithRows.add(splits.length);
}
}
Assert.assertEquals(3, splitsWithRows.size());
}
use of co.cask.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testMaxResolution.
@Test
public void testMaxResolution() throws Exception {
// we use Integer.MAX_VALUE as resolution to compute all-time total values
InMemoryTableService.create("TotalsEntityTable");
InMemoryTableService.create("TotalsDataTable");
int resolution = Integer.MAX_VALUE;
// should not matter when resolution is max
int rollTimebaseInterval = 3600;
FactTable table = new FactTable(new InMemoryMetricsTable("TotalsDataTable"), new EntityTable(new InMemoryMetricsTable("TotalsEntityTable")), resolution, rollTimebaseInterval);
// ts is expected in seconds
long ts = System.currentTimeMillis() / 1000;
int count = 1000;
for (int i = 0; i < count; i++) {
for (int k = 0; k < 10; k++) {
// shift one day
writeInc(table, "metric" + k, ts + i * 60 * 60 * 24, i * k, "dim" + k, "value" + k);
}
}
for (int k = 0; k < 10; k++) {
// 0, 0 should match timestamp of all data points
FactScan scan = new FactScan(0, 0, "metric" + k, dimValues("dim" + k, "value" + k));
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimValues("dim" + k, "value" + k), ImmutableList.of(new TimeValue(0, k * count * (count - 1) / 2)));
assertScan(table, expected, scan);
}
}
use of co.cask.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testBasics.
@Test
public void testBasics() throws Exception {
InMemoryTableService.create("EntityTable");
InMemoryTableService.create("DataTable");
int resolution = 10;
int rollTimebaseInterval = 2;
FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
// aligned to start of resolution bucket
// "/1000" because time is expected to be in seconds
long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
// testing encoding with multiple dims
List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
// trying adding one by one, in same (first) time resolution bucket
for (int i = 0; i < 5; i++) {
for (int k = 1; k < 4; k++) {
// note: "+i" here and below doesn't affect results, just to confirm
// that data points are rounded to the resolution
table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
}
}
// trying adding one by one, in different time resolution buckets
for (int i = 0; i < 3; i++) {
for (int k = 1; k < 4; k++) {
table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
}
}
// trying adding as list
// first incs in same (second) time resolution bucket
List<Fact> aggs = Lists.newArrayList();
for (int i = 0; i < 7; i++) {
for (int k = 1; k < 4; k++) {
aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
}
}
// then incs in different time resolution buckets
for (int i = 0; i < 3; i++) {
for (int k = 1; k < 4; k++) {
aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
}
}
table.add(aggs);
// verify each metric
for (int k = 1; k < 4; k++) {
FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
assertScan(table, expected, scan);
}
// verify each metric within a single timeBase
for (int k = 1; k < 4; k++) {
FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
assertScan(table, expected, scan);
}
// verify all metrics with fuzzy metric in scan
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
for (int k = 1; k < 4; k++) {
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
}
// metric = null means "all"
FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
assertScan(table, expected, scan);
// delete metric test
expected.clear();
// delete the metrics data at (timestamp + 20) resolution
scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
table.delete(scan);
for (int k = 1; k < 4; k++) {
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
}
// verify deletion
scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
assertScan(table, expected, scan);
// delete metrics for "metric1" at ts0 and verify deletion
scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
table.delete(scan);
expected.clear();
expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
assertScan(table, expected, scan);
// verify the next dims search
Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
Map<String, String> slice = Maps.newHashMap();
slice.put("dim1", null);
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
// add new dim values
dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
// search for metric names given dims list and verify
Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
use of co.cask.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class DefaultCube method query.
@Override
public Collection<TimeSeries> query(CubeQuery query) {
/*
CubeQuery example: "dataset read ops for app per dataset". Or:
SELECT count('read.ops') << measure name and type
FROM aggregation1.1min_resolution << aggregation and resolution
GROUP BY dataset, << groupByDimensions
WHERE namespace='ns1' AND app='myApp' AND program='myFlow' AND << dimensionValues
ts>=1423370200 AND ts{@literal<}1423398198 << startTs and endTs
LIMIT 100 << limit
Execution:
1) (optional, if aggregation to query in is not provided) find aggregation to supply results
Here, we need aggregation that has following dimensions: 'namespace', 'app', 'program', 'dataset'.
Ideally (to reduce the scan range), 'dataset' should be in the end, other dimensions as close to the beginning
as possible, and minimal number of other "unspecified" dimensions.
Let's say we found aggregation: 'namespace', 'app', 'program', 'instance', 'dataset'
2) build a scan in the aggregation
For scan we set "any" into the dimension values that aggregation has but query doesn't define value for:
'namespace'='ns1', 'app'='myApp', 'program'='myFlow', 'instance'=*, 'dataset'=*
Plus specified measure & aggregation?:
'measureName'='read.ops'
'measureType'='COUNTER'
3) While scanning build a table: dimension values -> time -> value. Use measureType as values aggregate
function if needed.
*/
incrementMetric("cube.query.request.count", 1);
if (!resolutionToFactTable.containsKey(query.getResolution())) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified resolution to satisfy the query: " + query.toString());
}
// 1) find aggregation to query
Aggregation agg;
String aggName;
if (query.getAggregation() != null) {
aggName = query.getAggregation();
agg = aggregations.get(query.getAggregation());
if (agg == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException(String.format("Specified aggregation %s is not found in cube aggregations: %s", query.getAggregation(), aggregations.keySet().toString()));
}
} else {
ImmutablePair<String, Aggregation> aggregation = findAggregation(query);
if (aggregation == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified dimensions " + "to satisfy the query: " + query.toString());
}
agg = aggregation.getSecond();
aggName = aggregation.getFirst();
}
// tell how many queries end up querying specific pre-aggregated views and resolutions
incrementMetric("cube.query.agg." + aggName + ".count", 1);
incrementMetric("cube.query.res." + query.getResolution() + ".count", 1);
// 2) build a scan for a query
List<DimensionValue> dimensionValues = Lists.newArrayList();
for (String dimensionName : agg.getDimensionNames()) {
// if not defined in query, will be set as null, which means "any"
dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
}
FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasurements().keySet(), dimensionValues);
// 3) execute scan query
FactTable table = resolutionToFactTable.get(query.getResolution());
FactScanner scanner = table.scan(scan);
Table<Map<String, String>, String, Map<Long, Long>> resultMap = getTimeSeries(query, scanner);
incrementMetric("cube.query.request.success.count", 1);
incrementMetric("cube.query.result.size", resultMap.size());
Collection<TimeSeries> timeSeries = convertToQueryResult(query, resultMap);
incrementMetric("cube.query.result.timeseries.count", timeSeries.size());
return timeSeries;
}
use of co.cask.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class DefaultCubeTest method getCube.
@Override
protected Cube getCube(final String name, int[] resolutions, Map<String, ? extends Aggregation> aggregations) {
FactTableSupplier supplier = new FactTableSupplier() {
@Override
public FactTable get(int resolution, int rollTime) {
String entityTableName = "EntityTable-" + name;
InMemoryTableService.create(entityTableName);
String dataTableName = "DataTable-" + name + "-" + resolution;
InMemoryTableService.create(dataTableName);
return new FactTable(new InMemoryMetricsTable(dataTableName), new EntityTable(new InMemoryMetricsTable(entityTableName)), resolution, rollTime);
}
};
return new DefaultCube(resolutions, supplier, aggregations, ImmutableMap.<String, AggregationAlias>of());
}
Aggregations