use of io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class DefaultCube method query.
@Override
public Collection<TimeSeries> query(CubeQuery query) {
/*
CubeQuery example: "dataset read ops for app per dataset". Or:
SELECT count('read.ops') << measure name and type
FROM aggregation1.1min_resolution << aggregation and resolution
GROUP BY dataset, << groupByDimensions
WHERE namespace='ns1' AND app='myApp' AND program='myFlow' AND << dimensionValues
ts>=1423370200 AND ts{@literal<}1423398198 << startTs and endTs
LIMIT 100 << limit
Execution:
1) (optional, if aggregation to query in is not provided) find aggregation to supply results
Here, we need aggregation that has following dimensions: 'namespace', 'app', 'program', 'dataset'.
Ideally (to reduce the scan range), 'dataset' should be in the end, other dimensions as close to the beginning
as possible, and minimal number of other "unspecified" dimensions.
Let's say we found aggregation: 'namespace', 'app', 'program', 'instance', 'dataset'
2) build a scan in the aggregation
For scan we set "any" into the dimension values that aggregation has but query doesn't define value for:
'namespace'='ns1', 'app'='myApp', 'program'='myFlow', 'instance'=*, 'dataset'=*
Plus specified measure & aggregation?:
'measureName'='read.ops'
'measureType'='COUNTER'
3) While scanning build a table: dimension values -> time -> value. Use measureType as values aggregate
function if needed.
*/
incrementMetric("cube.query.request.count", 1);
if (!resolutionToFactTable.containsKey(query.getResolution())) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified resolution to satisfy the query: " + query.toString());
}
// 1) find aggregation to query
Aggregation agg;
String aggName;
if (query.getAggregation() != null) {
aggName = query.getAggregation();
agg = aggregations.get(query.getAggregation());
if (agg == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException(String.format("Specified aggregation %s is not found in cube aggregations: %s", query.getAggregation(), aggregations.keySet().toString()));
}
} else {
ImmutablePair<String, Aggregation> aggregation = findAggregation(query);
if (aggregation == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified dimensions " + "to satisfy the query: " + query.toString());
}
agg = aggregation.getSecond();
aggName = aggregation.getFirst();
}
// tell how many queries end up querying specific pre-aggregated views and resolutions
incrementMetric("cube.query.agg." + aggName + ".count", 1);
incrementMetric("cube.query.res." + query.getResolution() + ".count", 1);
// 2) build a scan for a query
List<DimensionValue> dimensionValues = Lists.newArrayList();
for (String dimensionName : agg.getDimensionNames()) {
// if not defined in query, will be set as null, which means "any"
dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
}
FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasurements().keySet(), dimensionValues);
// 3) execute scan query
FactTable table = resolutionToFactTable.get(query.getResolution());
FactScanner scanner = table.scan(scan);
Table<Map<String, String>, String, Map<Long, Long>> resultMap = getTimeSeries(query, scanner);
incrementMetric("cube.query.request.success.count", 1);
incrementMetric("cube.query.result.size", resultMap.size());
Collection<TimeSeries> timeSeries = convertToQueryResult(query, resultMap);
incrementMetric("cube.query.result.timeseries.count", timeSeries.size());
return timeSeries;
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testMaxResolution.
@Test
public void testMaxResolution() throws Exception {
// we use Integer.MAX_VALUE as resolution to compute all-time total values
InMemoryTableService.create("TotalsEntityTable");
InMemoryTableService.create("TotalsDataTable");
int resolution = Integer.MAX_VALUE;
// should not matter when resolution is max
int rollTimebaseInterval = 3600;
FactTable table = new FactTable(new InMemoryMetricsTable("TotalsDataTable"), new EntityTable(new InMemoryMetricsTable("TotalsEntityTable")), resolution, rollTimebaseInterval);
// ts is expected in seconds
long ts = System.currentTimeMillis() / 1000;
int count = 1000;
for (int i = 0; i < count; i++) {
for (int k = 0; k < 10; k++) {
// shift one day
writeInc(table, "metric" + k, ts + i * 60 * 60 * 24, i * k, "dim" + k, "value" + k);
}
}
for (int k = 0; k < 10; k++) {
// 0, 0 should match timestamp of all data points
FactScan scan = new FactScan(0, 0, "metric" + k, dimValues("dim" + k, "value" + k));
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimValues("dim" + k, "value" + k), ImmutableList.of(new TimeValue(0, k * count * (count - 1) / 2)));
assertScan(table, expected, scan);
}
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testCache.
@Test
public void testCache() throws Exception {
String tableName = "testCacheTable";
String entityTableName = "testCacheEntityTable";
InMemoryTableService.create(tableName);
InMemoryTableService.create(entityTableName);
int resolution = 5;
InMemoryMetricsTable metricsTable = new InMemoryMetricsTable(tableName);
FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable(entityTableName)), resolution, 2);
// set the metrics collector for the table
FactTableMetricsCollector metricsCollector = new FactTableMetricsCollector(resolution);
table.setMetricsCollector(metricsCollector);
// Initially the cache should be empty
Assert.assertEquals(0, table.getFactCounterCache().size());
// add some value with current ts
long timestampNow = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()) / resolution * resolution;
List<DimensionValue> dims = dimValues("dim1", "dim2");
List<Fact> metrics = new ArrayList<>();
for (int i = 0; i < 10; i++) {
metrics.add(new Fact(timestampNow, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
}
table.add(metrics);
// Since no previous add to the metric store, these increment should still be considered as COUNTER, and the
// cache should be updated.
Assert.assertEquals(10, metricsCollector.getLastIncrementSize());
Assert.assertEquals(0, metricsCollector.getLastGaugeSize());
Assert.assertEquals(10, table.getFactCounterCache().size());
for (long value : table.getFactCounterCache().asMap().values()) {
Assert.assertEquals(timestampNow, value);
}
// Add metrics older than the current timestamp, these increment should still be considered as COUNTER, and the
// cache should NOT be updated.
metrics = new ArrayList<>();
for (int i = 0; i < 10; i++) {
metrics.add(new Fact(timestampNow - 5, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
}
table.add(metrics);
Assert.assertEquals(10, metricsCollector.getLastIncrementSize());
Assert.assertEquals(0, metricsCollector.getLastGaugeSize());
Assert.assertEquals(10, table.getFactCounterCache().size());
for (long value : table.getFactCounterCache().asMap().values()) {
Assert.assertEquals(timestampNow, value);
}
// Now insert metrics newer than the current timestamp, the increment should be considered as GAUGE, and the cache
// should be updated
metrics = new ArrayList<>();
for (int i = 0; i < 10; i++) {
metrics.add(new Fact(timestampNow + 5, dims, new Measurement("metric" + i, MeasureType.COUNTER, 1)));
}
table.add(metrics);
Assert.assertEquals(0, metricsCollector.getLastIncrementSize());
Assert.assertEquals(10, metricsCollector.getLastGaugeSize());
Assert.assertEquals(10, table.getFactCounterCache().size());
for (long value : table.getFactCounterCache().asMap().values()) {
Assert.assertEquals(timestampNow + 5, value);
}
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testQuery.
@Test
public void testQuery() throws Exception {
InMemoryTableService.create("QueryEntityTable");
InMemoryTableService.create("QueryDataTable");
int resolution = 10;
int rollTimebaseInterval = 2;
FactTable table = new FactTable(new InMemoryMetricsTable("QueryDataTable"), new EntityTable(new InMemoryMetricsTable("QueryEntityTable")), resolution, rollTimebaseInterval);
// aligned to start of resolution bucket
// "/1000" because time is expected to be in seconds
long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
for (int i = 0; i < 3; i++) {
for (int k = 1; k < 3; k++) {
// note: "+i" to ts here and below doesn't affect results, just to confirm
// that data points are rounded to the resolution
writeInc(table, "metric" + k, ts + i * resolution + i, k + i, "dim1", "value1", "dim2", "value2");
writeInc(table, "metric" + k, ts + i * resolution + i, 2 * k + i, "dim1", "value2", "dim2", "value2");
writeInc(table, "metric" + k, ts + i * resolution + i, 3 * k + i, "dim1", "value2", "dim2", "value1");
writeInc(table, "metric" + k, ts + i * resolution + i, 4 * k + i, "dim1", "value1", "dim2", "value3");
// null value in dim matches only fuzzy ("any")
writeInc(table, "metric" + k, ts + i * resolution + i, 5 * k + i, "dim1", null, "dim2", "value3");
}
}
Table<String, List<DimensionValue>, List<TimeValue>> expected;
FactScan scan;
for (int i = 1; i < 3; i++) {
// all time points
scan = new FactScan(ts - resolution, ts + 3 * resolution, "metric" + i, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts, resolution, i, i + 1, i + 2));
assertScan(table, expected, scan);
// time points since second interval
scan = new FactScan(ts + resolution, ts + 3 * resolution, "metric" + i, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts + resolution, resolution, i + 1, i + 2));
assertScan(table, expected, scan);
// time points before third interval
scan = new FactScan(ts - resolution, ts + resolution, "metric" + i, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts, resolution, i, i + 1));
assertScan(table, expected, scan);
// time points for fuzzy dim2 since second interval
scan = new FactScan(ts + resolution, ts + 3 * resolution, // null stands for any
"metric" + i, dimValues("dim1", "value1", "dim2", null));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts + resolution, resolution, i + 1, i + 2));
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts + resolution, resolution, 4 * i + 1, 4 * i + 2));
assertScan(table, expected, scan);
// time points for fuzzy dim1 before third interval
scan = new FactScan(ts - resolution, ts + resolution, // null stands for any
"metric" + i, dimValues("dim1", null, "dim2", "value3"));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts, resolution, 4 * i, 4 * i + 1));
expected.put("metric" + i, dimValues("dim1", null, "dim2", "value3"), timeValues(ts, resolution, 5 * i, 5 * i + 1));
assertScan(table, expected, scan);
// time points for both fuzzy dims before third interval
scan = new FactScan(ts - resolution, ts + resolution, // null stands for any
"metric" + i, dimValues("dim1", null, "dim2", null));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts, resolution, i, i + 1));
expected.put("metric" + i, dimValues("dim1", "value2", "dim2", "value1"), timeValues(ts, resolution, 3 * i, 3 * i + 1));
expected.put("metric" + i, dimValues("dim1", "value2", "dim2", "value2"), timeValues(ts, resolution, 2 * i, 2 * i + 1));
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts, resolution, 4 * i, 4 * i + 1));
expected.put("metric" + i, dimValues("dim1", null, "dim2", "value3"), timeValues(ts, resolution, 5 * i, 5 * i + 1));
assertScan(table, expected, scan);
// time points for both fuzzy dims since third interval
scan = new FactScan(ts + resolution, ts + 3 * resolution, // null stands for any
"metric" + i, dimValues("dim1", null, "dim2", null));
expected = HashBasedTable.create();
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts + resolution, resolution, i + 1, i + 2));
expected.put("metric" + i, dimValues("dim1", "value2", "dim2", "value1"), timeValues(ts + resolution, resolution, 3 * i + 1, 3 * i + 2));
expected.put("metric" + i, dimValues("dim1", "value2", "dim2", "value2"), timeValues(ts + resolution, resolution, 2 * i + 1, 2 * i + 2));
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts + resolution, resolution, 4 * i + 1, 4 * i + 2));
expected.put("metric" + i, dimValues("dim1", null, "dim2", "value3"), timeValues(ts + resolution, resolution, 5 * i + 1, 5 * i + 2));
assertScan(table, expected, scan);
}
// all time points
scan = new FactScan(ts - resolution, ts + 3 * resolution, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
for (int i = 1; i < 3; i++) {
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts, resolution, i, i + 1, i + 2));
}
assertScan(table, expected, scan);
// time points since second interval
scan = new FactScan(ts + resolution, ts + 3 * resolution, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
for (int i = 1; i < 3; i++) {
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts + resolution, resolution, i + 1, i + 2));
}
assertScan(table, expected, scan);
// time points before third interval
scan = new FactScan(ts - resolution, ts + resolution, dimValues("dim1", "value1", "dim2", "value2"));
expected = HashBasedTable.create();
for (int i = 1; i < 3; i++) {
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts, resolution, i, i + 1));
}
assertScan(table, expected, scan);
// time points for fuzzy dim2 since second interval
scan = new FactScan(ts + resolution, ts + 3 * resolution, dimValues("dim1", "value1", "dim2", null));
expected = HashBasedTable.create();
for (int i = 1; i < 3; i++) {
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value2"), timeValues(ts + resolution, resolution, i + 1, i + 2));
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts + resolution, resolution, 4 * i + 1, 4 * i + 2));
}
assertScan(table, expected, scan);
// time points for fuzzy dim1 before third interval (very important case - caught some bugs)
scan = new FactScan(ts - resolution, ts + resolution, dimValues("dim1", null, "dim2", "value3"));
expected = HashBasedTable.create();
for (int i = 1; i < 3; i++) {
expected.put("metric" + i, dimValues("dim1", "value1", "dim2", "value3"), timeValues(ts, resolution, 4 * i, 4 * i + 1));
expected.put("metric" + i, dimValues("dim1", null, "dim2", "value3"), timeValues(ts, resolution, 5 * i, 5 * i + 1));
}
assertScan(table, expected, scan);
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable in project cdap by caskdata.
the class FactTableTest method testSearch.
@Test
public void testSearch() throws Exception {
InMemoryTableService.create("SearchEntityTable");
InMemoryTableService.create("SearchDataTable");
int resolution = Integer.MAX_VALUE;
int rollTimebaseInterval = 2;
FactTable table = new FactTable(new InMemoryMetricsTable("SearchDataTable"), new EntityTable(new InMemoryMetricsTable("SearchEntityTable")), resolution, rollTimebaseInterval);
// aligned to start of resolution bucket
// "/1000" because time is expected to be in seconds
long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
List<String> aggregationList = ImmutableList.of("dim1", "dim2", "dim3", "dim4");
for (int i = 0; i < 2; i++) {
writeInc(table, "metric-a" + i, ts + i, i, "dim1", "value1", "dim2", "value2", "dim3", "value3", "dim4", "value4");
writeInc(table, "metric-b" + i, ts + i, i, "dim1", "value2", "dim2", "value2", "dim3", "x3", "dim4", "x4");
writeInc(table, "metric-c" + i, ts + i, i, "dim1", "value2", "dim2", "value2", "dim3", null, "dim4", "y4");
writeInc(table, "metric-d" + i, ts + i, i, "dim1", "value1", "dim2", "value3", "dim3", "y3", "dim4", null);
}
Map<String, String> slice = Maps.newHashMap();
slice.put("dim1", "value2");
slice.put("dim2", "value2");
slice.put("dim3", null);
// verify search dims
testTagSearch(table, aggregationList, ImmutableMap.of("dim2", "value2"), ImmutableSet.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim1", "value2")));
testTagSearch(table, aggregationList, ImmutableMap.of("dim1", "value1"), ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value3")));
testTagSearch(table, aggregationList, ImmutableMap.<String, String>of(), ImmutableSet.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim1", "value2")));
testTagSearch(table, aggregationList, ImmutableMap.of("dim1", "value2", "dim2", "value2"), ImmutableSet.of(new DimensionValue("dim3", "x3"), new DimensionValue("dim4", "y4")));
testTagSearch(table, aggregationList, slice, ImmutableSet.of(new DimensionValue("dim4", "x4"), new DimensionValue("dim4", "y4")));
testTagSearch(table, aggregationList, ImmutableMap.of("dim1", "value2", "dim2", "value3", "dim3", "y3"), ImmutableSet.<DimensionValue>of());
// verify search metrics
testMetricNamesSearch(table, aggregationList, ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ImmutableSet.<String>of("metric-a0", "metric-a1"));
testMetricNamesSearch(table, aggregationList, ImmutableMap.of("dim2", "value2"), ImmutableSet.<String>of("metric-a0", "metric-a1", "metric-b0", "metric-b1", "metric-c0", "metric-c1"));
testMetricNamesSearch(table, aggregationList, slice, ImmutableSet.of("metric-b0", "metric-b1", "metric-c0", "metric-c1"));
}
Aggregations