use of io.cdap.cdap.api.dataset.lib.cube.TimeSeries in project cdap by caskdata.
the class AbstractCubeTest method testBasics.
@Test
public void testBasics() throws Exception {
Aggregation agg1 = new DefaultAggregation(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableList.of("dim1", "dim2"));
Aggregation agg2 = new DefaultAggregation(ImmutableList.of("dim1", "dim2"), ImmutableList.of("dim1"));
int resolution = 1;
Cube cube = getCube("myCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1, "agg2", agg2));
// write some data
// NOTE: we mostly use different ts, as we are interested in checking incs not at persist, but rather at query time
writeInc(cube, "metric1", 1, 1, "1", "1", "1");
writeInc(cube, "metric1", 1, 1, "1", "1", "1");
writeInc(cube, "metric1", 2, 2, null, "1", "1");
writeInc(cube, "metric1", 3, 3, "1", "2", "1");
writeInc(cube, "metric1", 3, 5, "1", "2", "3");
writeInc(cube, "metric1", 3, 7, "2", "1", "1");
writeInc(cube, "metric1", 4, 4, "1", null, "2");
writeInc(cube, "metric1", 5, 5, null, null, "1");
writeInc(cube, "metric1", 6, 6, "1", null, null);
writeInc(cube, "metric1", 7, 3, "1", "1", null);
// writing using BatchWritable APIs
writeIncViaBatchWritable(cube, "metric1", 8, 2, null, "1", null);
writeIncViaBatchWritable(cube, "metric1", 9, 1, null, null, null);
// writing in batch
cube.add(ImmutableList.of(getFact("metric1", 10, 2, MeasureType.COUNTER, "1", "1", "1", "1"), getFact("metric1", 11, 3, MeasureType.COUNTER, "1", "1", "1", null), getFact("metric1", 12, 4, MeasureType.COUNTER, "2", "1", "1", "1"), getFact("metric1", 13, 5, MeasureType.COUNTER, null, null, null, "1")));
writeInc(cube, "metric2", 1, 1, "1", "1", "1");
// todo: do some write instead of increments - test those as well
// now let's query!
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1"), ImmutableList.of("dim2"), ImmutableList.of(new TimeSeries("metric1", dimensionValues("dim2", "1"), timeValues(1, 2, 7, 3, 10, 2, 11, 3)), new TimeSeries("metric1", dimensionValues("dim2", "2"), timeValues(3, 8))));
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 10, 2, 11, 3))));
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, new HashMap<String, String>(), ImmutableList.of("dim1"), ImmutableList.of(new TimeSeries("metric1", dimensionValues("dim1", "1"), timeValues(1, 2, 3, 8, 4, 4, 6, 6, 7, 3, 10, 2, 11, 3)), new TimeSeries("metric1", dimensionValues("dim1", "2"), timeValues(3, 7, 12, 4))));
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim3", "3"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(3, 5))));
// test querying specific aggregations
verifyCountQuery(cube, "agg1", 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 3, 8, 7, 3, 10, 2, 11, 3))));
verifyCountQuery(cube, "agg2", 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 3, 8, 4, 4, 6, 6, 7, 3, 10, 2, 11, 3))));
// query with different agg functions
verifyCountQuery(cube, "agg1", 0, 15, resolution, "metric1", AggregationFunction.MAX, ImmutableMap.of("dim1", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 3, 5, 7, 3, 10, 2, 11, 3))));
verifyCountQuery(cube, "agg1", 0, 15, resolution, "metric1", AggregationFunction.MIN, ImmutableMap.of("dim1", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 3, 3, 7, 3, 10, 2, 11, 3))));
verifyCountQuery(cube, "agg1", 0, 15, resolution, "metric1", AggregationFunction.LATEST, ImmutableMap.of("dim1", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 2, 3, 5, 7, 3, 10, 2, 11, 3))));
// delete cube data for "metric1" for dim->1,dim2->1,dim3->1 for timestamp 1 - 8 and
// check data for other timestamp is available
Map<String, String> deleteTags = new LinkedHashMap<>();
deleteTags.put("dim1", "1");
deleteTags.put("dim2", "1");
deleteTags.put("dim3", "1");
Predicate<List<String>> predicate = aggregates -> Collections.indexOfSubList(aggregates, new ArrayList<>(deleteTags.keySet())) == 0;
CubeDeleteQuery query = new CubeDeleteQuery(0, 8, resolution, deleteTags, Collections.singletonList("metric1"), predicate);
cube.delete(query);
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), ImmutableList.<String>of(), ImmutableList.of(new TimeSeries("metric1", new HashMap<>(), timeValues(10, 2, 11, 3))));
// delete cube data for "metric1" for dim1->1 and dim2->1 and check by scanning dim1->1 and dim2->1 is empty,
deleteTags.remove("dim3");
query = new CubeDeleteQuery(0, 15, resolution, deleteTags, Collections.singletonList("metric1"), predicate);
cube.delete(query);
verifyCountQuery(cube, 0, 15, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1"), ImmutableList.<String>of(), ImmutableList.<TimeSeries>of());
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeSeries in project cdap by caskdata.
the class AbstractCubeTest method testMetricsAggregationOptionSum.
@Test
public void testMetricsAggregationOptionSum() throws Exception {
// two aggregation groups
Aggregation agg1 = new DefaultAggregation(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableList.of("dim1"));
Aggregation agg2 = new DefaultAggregation(ImmutableList.of("dim1", "dim2"), ImmutableList.of("dim1"));
int resolution = 1;
Cube cube = getCube("testAggOptionSum", new int[] { resolution }, ImmutableMap.of("agg1", agg1, "agg2", agg2));
Map<String, String> agg1Dims = new LinkedHashMap<>();
agg1Dims.put("dim1", "tag1");
agg1Dims.put("dim2", "tag2");
agg1Dims.put("dim3", "tag3");
Map<String, String> agg2Dims = new LinkedHashMap<>();
agg2Dims.put("dim1", "tag1");
agg2Dims.put("dim2", "tag4");
// write 100 data points to agg1
for (int i = 1; i <= 100; i++) {
writeInc(cube, "metric1", i, 1, agg1Dims);
writeInc(cube, "metric2", i, 2, agg1Dims);
}
// write 50 data points to agg2
for (int i = 1; i <= 50; i++) {
writeInc(cube, "metric1", i, 3, agg2Dims);
}
// test limit must be greater than 0
CubeQuery query = new CubeQuery(null, 0, 200, 1, 0, ImmutableMap.of("metric1", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
try {
cube.query(query);
Assert.fail();
} catch (Exception e) {
// expected
}
query = new CubeQuery(null, 0, 200, 1, -10, ImmutableMap.of("metric1", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
try {
cube.query(query);
Assert.fail();
} catch (Exception e) {
// expected
}
// test a limit greater than the number data points, all the data points should be returned
query = new CubeQuery(null, 0, 200, 1, 200, ImmutableMap.of("metric1", AggregationFunction.SUM, "metric2", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
List<TimeSeries> result = new ArrayList<>(cube.query(query));
Assert.assertEquals(2, result.size());
verifySumAggregation(result.get(0), "metric1", 100, 1, 1, 0, 0);
verifySumAggregation(result.get(1), "metric2", 100, 2, 1, 0, 0);
// test aggregation option with sum for metric1 and metric2 for agg1, 5 data points for agg1 should get returned
// for both metric1 and metric2
query = new CubeQuery(null, 0, 200, 1, 5, ImmutableMap.of("metric1", AggregationFunction.SUM, "metric2", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
result = new ArrayList<>(cube.query(query));
Assert.assertEquals(2, result.size());
// metric1 increment by 1 per second, so sum will be 100/5=20, metric2 increment by 2 per second, so sum will be
// 200/5=40
verifySumAggregation(result.get(0), "metric1", 5, 20, 20, 0, 0);
verifySumAggregation(result.get(1), "metric2", 5, 40, 20, 0, 0);
// test aggregation option with sum for metric1 with tag name dim1, it should return two time series for agg1 and
// agg2 for metric1, each with 5 data points
query = new CubeQuery(null, 0, 200, 1, 5, ImmutableMap.of("metric1", AggregationFunction.SUM), ImmutableMap.of("dim1", "tag1"), ImmutableList.of("dim2"), AggregationOption.SUM, null);
result = new ArrayList<>(cube.query(query));
Assert.assertEquals(2, result.size());
// agg1 gets increment by 1 for 100 seconds, so sum will be 100/5=20, agg2 gets increment by 3 for 50 seconds, so
// sum will be 3*50/5=30
verifySumAggregation(result.get(0), "metric1", 5, 30, 10, 0, 0);
verifySumAggregation(result.get(1), "metric1", 5, 20, 20, 0, 0);
// test metric1 with count 9, this will have a remainder 100%9=1, so there will be 9 aggregated data points, each
// with partition size 11
query = new CubeQuery(null, 0, 200, 1, 9, ImmutableMap.of("metric1", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
result = new ArrayList<>(cube.query(query));
Assert.assertEquals(1, result.size());
// the rest data points have sum 11
verifySumAggregation(result.get(0), "metric1", 9, 11, 11, 1, 1);
// test metric1 with count 70, this will have a remainder 100%70=30, so the result will have last 70 data points,
// the first 30 data points will be ignored
query = new CubeQuery(null, 0, 200, 1, 70, ImmutableMap.of("metric1", AggregationFunction.SUM), agg1Dims, Collections.emptyList(), AggregationOption.SUM, null);
result = new ArrayList<>(cube.query(query));
Assert.assertEquals(1, result.size());
// the rest data points have sum 11
verifySumAggregation(result.get(0), "metric1", 70, 1, 1, 30, 30);
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeSeries in project cdap by caskdata.
the class AbstractCubeTest method testInterpolate.
@Test
public void testInterpolate() throws Exception {
Aggregation agg1 = new DefaultAggregation(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableList.of("dim1", "dim2", "dim3"));
int resolution = 1;
Cube cube = getCube("myInterpolatedCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1));
// test step interpolation
long startTs = 1;
long endTs = 10;
writeInc(cube, "metric1", startTs, 5, "1", "1", "1");
writeInc(cube, "metric1", endTs, 3, "1", "1", "1");
List<TimeValue> expectedTimeValues = Lists.newArrayList();
for (long i = startTs; i < endTs; i++) {
expectedTimeValues.add(new TimeValue(i, 5));
}
expectedTimeValues.add(new TimeValue(endTs, 3));
verifyCountQuery(cube, startTs, endTs, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), expectedTimeValues)), new Interpolators.Step());
Map<String, String> deleteTags = new LinkedHashMap<>();
deleteTags.put("dim1", "1");
deleteTags.put("dim2", "1");
deleteTags.put("dim3", "1");
Predicate<List<String>> predicate = aggregates -> Collections.indexOfSubList(aggregates, new ArrayList<>(deleteTags.keySet())) == 0;
CubeDeleteQuery query = new CubeDeleteQuery(startTs, endTs, resolution, deleteTags, Collections.singletonList("metric1"), predicate);
cube.delete(query);
// test small-slope linear interpolation
startTs = 1;
endTs = 5;
writeInc(cube, "metric1", startTs, 5, "1", "1", "1");
writeInc(cube, "metric1", endTs, 3, "1", "1", "1");
verifyCountQuery(cube, startTs, endTs, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 5, 2, 5, 3, 4, 4, 4, 5, 3))), new Interpolators.Linear());
query = new CubeDeleteQuery(startTs, endTs, resolution, deleteTags, Collections.singletonList("metric1"), predicate);
cube.delete(query);
// test big-slope linear interpolation
writeInc(cube, "metric1", startTs, 100, "1", "1", "1");
writeInc(cube, "metric1", endTs, 500, "1", "1", "1");
verifyCountQuery(cube, startTs, endTs, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 100, 2, 200, 3, 300, 4, 400, 5, 500))), new Interpolators.Linear());
cube.delete(query);
// test limit on Interpolate
long limit = 20;
writeInc(cube, "metric1", 0, 10, "1", "1", "1");
writeInc(cube, "metric1", limit + 1, 50, "1", "1", "1");
expectedTimeValues.clear();
expectedTimeValues.add(new TimeValue(0, 10));
for (long i = 1; i <= limit; i++) {
expectedTimeValues.add(new TimeValue(i, 0));
}
expectedTimeValues.add(new TimeValue(limit + 1, 50));
verifyCountQuery(cube, 0, 21, resolution, "metric1", AggregationFunction.SUM, ImmutableMap.of("dim1", "1", "dim2", "1", "dim3", "1"), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), expectedTimeValues)), new Interpolators.Step(limit));
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeSeries in project cdap by caskdata.
the class DefaultCube method query.
@Override
public Collection<TimeSeries> query(CubeQuery query) {
/*
CubeQuery example: "dataset read ops for app per dataset". Or:
SELECT count('read.ops') << measure name and type
FROM aggregation1.1min_resolution << aggregation and resolution
GROUP BY dataset, << groupByDimensions
WHERE namespace='ns1' AND app='myApp' AND program='myFlow' AND << dimensionValues
ts>=1423370200 AND ts{@literal<}1423398198 << startTs and endTs
LIMIT 100 << limit
Execution:
1) (optional, if aggregation to query in is not provided) find aggregation to supply results
Here, we need aggregation that has following dimensions: 'namespace', 'app', 'program', 'dataset'.
Ideally (to reduce the scan range), 'dataset' should be in the end, other dimensions as close to the beginning
as possible, and minimal number of other "unspecified" dimensions.
Let's say we found aggregation: 'namespace', 'app', 'program', 'instance', 'dataset'
2) build a scan in the aggregation
For scan we set "any" into the dimension values that aggregation has but query doesn't define value for:
'namespace'='ns1', 'app'='myApp', 'program'='myFlow', 'instance'=*, 'dataset'=*
Plus specified measure & aggregation?:
'measureName'='read.ops'
'measureType'='COUNTER'
3) While scanning build a table: dimension values -> time -> value. Use measureType as values aggregate
function if needed.
*/
incrementMetric("cube.query.request.count", 1);
if (!resolutionToFactTable.containsKey(query.getResolution())) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified resolution to satisfy the query: " + query.toString());
}
// 1) find aggregation to query
Aggregation agg;
String aggName;
if (query.getAggregation() != null) {
aggName = query.getAggregation();
agg = aggregations.get(query.getAggregation());
if (agg == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException(String.format("Specified aggregation %s is not found in cube aggregations: %s", query.getAggregation(), aggregations.keySet().toString()));
}
} else {
ImmutablePair<String, Aggregation> aggregation = findAggregation(query);
if (aggregation == null) {
incrementMetric("cube.query.request.failure.count", 1);
throw new IllegalArgumentException("There's no data aggregated for specified dimensions " + "to satisfy the query: " + query.toString());
}
agg = aggregation.getSecond();
aggName = aggregation.getFirst();
}
// tell how many queries end up querying specific pre-aggregated views and resolutions
incrementMetric("cube.query.agg." + aggName + ".count", 1);
incrementMetric("cube.query.res." + query.getResolution() + ".count", 1);
// 2) build a scan for a query
List<DimensionValue> dimensionValues = Lists.newArrayList();
for (String dimensionName : agg.getDimensionNames()) {
// if not defined in query, will be set as null, which means "any"
dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
}
FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasurements().keySet(), dimensionValues);
// 3) execute scan query
FactTable table = resolutionToFactTable.get(query.getResolution());
FactScanner scanner = table.scan(scan);
Table<Map<String, String>, String, Map<Long, Long>> resultMap = getTimeSeries(query, scanner);
incrementMetric("cube.query.request.success.count", 1);
incrementMetric("cube.query.result.size", resultMap.size());
Collection<TimeSeries> timeSeries = convertToQueryResult(query, resultMap);
incrementMetric("cube.query.result.timeseries.count", timeSeries.size());
return timeSeries;
}
use of io.cdap.cdap.api.dataset.lib.cube.TimeSeries in project cdap by caskdata.
the class DefaultMetricStore method query.
@Override
public Collection<MetricTimeSeries> query(MetricDataQuery query) {
Collection<TimeSeries> cubeResult = cube.get().query(buildCubeQuery(query));
List<MetricTimeSeries> result = Lists.newArrayList();
for (TimeSeries timeSeries : cubeResult) {
result.add(new MetricTimeSeries(timeSeries.getMeasureName(), timeSeries.getDimensionValues(), timeSeries.getTimeValues()));
}
return result;
}
Aggregations