use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class FactTableTest method testPreSplits.
@Test
public void testPreSplits() throws Exception {
InMemoryTableService.create("presplitEntityTable");
InMemoryTableService.create("presplitDataTable");
int resolution = 10;
int rollTimebaseInterval = 2;
InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
byte[][] splits = FactTable.getSplits(3);
long ts = System.currentTimeMillis() / 1000;
DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
// first agg view: dim1
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
// second agg view: dim1 & dim2
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
// third agg view: dim3
table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
// Verify all written records are spread across splits
Scanner scanner = metricsTable.scan(null, null, null);
Row row;
Set<Integer> splitsWithRows = Sets.newHashSet();
while ((row = scanner.next()) != null) {
boolean added = false;
for (int i = 0; i < splits.length; i++) {
if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
splitsWithRows.add(i);
added = true;
break;
}
}
if (!added) {
// falls into last split
splitsWithRows.add(splits.length);
}
}
Assert.assertEquals(3, splitsWithRows.size());
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class FactTableTest method testBasics.
@Test
public void testBasics() throws Exception {
InMemoryTableService.create("EntityTable");
InMemoryTableService.create("DataTable");
int resolution = 10;
int rollTimebaseInterval = 2;
FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
// aligned to start of resolution bucket
// "/1000" because time is expected to be in seconds
long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
// testing encoding with multiple dims
List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
// trying adding one by one, in same (first) time resolution bucket
for (int i = 0; i < 5; i++) {
for (int k = 1; k < 4; k++) {
// note: "+i" here and below doesn't affect results, just to confirm
// that data points are rounded to the resolution
table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
}
}
// trying adding one by one, in different time resolution buckets
for (int i = 0; i < 3; i++) {
for (int k = 1; k < 4; k++) {
table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
}
}
// trying adding as list
// first incs in same (second) time resolution bucket
List<Fact> aggs = Lists.newArrayList();
for (int i = 0; i < 7; i++) {
for (int k = 1; k < 4; k++) {
aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
}
}
// then incs in different time resolution buckets
for (int i = 0; i < 3; i++) {
for (int k = 1; k < 4; k++) {
aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
}
}
table.add(aggs);
// verify each metric
for (int k = 1; k < 4; k++) {
FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
assertScan(table, expected, scan);
}
// verify each metric within a single timeBase
for (int k = 1; k < 4; k++) {
FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
assertScan(table, expected, scan);
}
// verify all metrics with fuzzy metric in scan
Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
for (int k = 1; k < 4; k++) {
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
}
// metric = null means "all"
FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
assertScan(table, expected, scan);
// delete metric test
expected.clear();
// delete the metrics data at (timestamp + 20) resolution
scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
table.delete(scan);
for (int k = 1; k < 4; k++) {
expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
}
// verify deletion
scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
assertScan(table, expected, scan);
// delete metrics for "metric1" at ts0 and verify deletion
scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
table.delete(scan);
expected.clear();
expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
assertScan(table, expected, scan);
// verify the next dims search
Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
Map<String, String> slice = Maps.newHashMap();
slice.put("dim1", null);
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
// add new dim values
dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
// search for metric names given dims list and verify
Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class DefaultDatasetTypeService method createModuleConsumer.
private AbstractBodyConsumer createModuleConsumer(final DatasetModuleId datasetModuleId, final String className, final boolean forceUpdate) throws IOException, NotFoundException {
final NamespaceId namespaceId = datasetModuleId.getParent();
final Location namespaceHomeLocation;
try {
namespaceHomeLocation = impersonator.doAs(namespaceId, new Callable<Location>() {
@Override
public Location call() throws Exception {
return namespacePathLocator.get(namespaceId);
}
});
} catch (Exception e) {
// the only checked exception that the callable throws is IOException
Throwables.propagateIfInstanceOf(e, IOException.class);
throw Throwables.propagate(e);
}
// verify namespace directory exists
if (!namespaceHomeLocation.exists()) {
String msg = String.format("Home directory %s for namespace %s not found", namespaceHomeLocation, namespaceId);
LOG.debug(msg);
throw new NotFoundException(msg);
}
// Store uploaded content to a local temp file
String namespacesDir = cConf.get(Constants.Namespace.NAMESPACES_DIR);
File localDataDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR));
File namespaceBase = new File(localDataDir, namespacesDir);
File tempDir = new File(new File(namespaceBase, datasetModuleId.getNamespace()), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
if (!DirUtils.mkdirs(tempDir)) {
throw new IOException("Could not create temporary directory at: " + tempDir);
}
return new AbstractBodyConsumer(File.createTempFile("dataset-", ".jar", tempDir)) {
@Override
protected void onFinish(HttpResponder responder, File uploadedFile) throws Exception {
if (className == null) {
// We have to delay until body upload is completed due to the fact that not all client is
// requesting with "Expect: 100-continue" header and the client library we have cannot handle
// connection close, and yet be able to read response reliably.
// In longer term we should fix the client, as well as the netty-http server. However, since
// this handler will be gone in near future, it's ok to have this workaround.
responder.sendString(HttpResponseStatus.BAD_REQUEST, "Required header 'class-name' is absent.");
return;
}
LOG.debug("Adding module {}, class name: {}", datasetModuleId, className);
String dataFabricDir = cConf.get(Constants.Dataset.Manager.OUTPUT_DIR);
String moduleName = datasetModuleId.getModule();
Location archiveDir = namespaceHomeLocation.append(dataFabricDir).append(moduleName).append(Constants.ARCHIVE_DIR);
String archiveName = moduleName + ".jar";
Location archive = archiveDir.append(archiveName);
// Copy uploaded content to a temporary location
Location tmpLocation = archive.getTempFile(".tmp");
try {
Locations.mkdirsIfNotExists(archiveDir);
LOG.debug("Copy from {} to {}", uploadedFile, tmpLocation);
Files.copy(uploadedFile, Locations.newOutputSupplier(tmpLocation));
// Finally, move archive to final location
LOG.debug("Storing module {} jar at {}", datasetModuleId, archive);
if (tmpLocation.renameTo(archive) == null) {
throw new IOException(String.format("Could not move archive from location: %s, to location: %s", tmpLocation, archive));
}
typeManager.addModule(datasetModuleId, className, archive, forceUpdate);
// todo: response with DatasetModuleMeta of just added module (and log this info)
// Ideally this should have been done before, but we cannot grant privileges on types until they've been
// added to the type MDS. First revoke any orphaned privileges for types left behind by past failed revokes
LOG.info("Added module {}", datasetModuleId);
responder.sendStatus(HttpResponseStatus.OK);
} catch (Exception e) {
// In case copy to temporary file failed, or rename failed
try {
tmpLocation.delete();
} catch (IOException ex) {
LOG.warn("Failed to cleanup temporary location {}", tmpLocation);
}
if (e instanceof DatasetModuleConflictException) {
responder.sendString(HttpResponseStatus.CONFLICT, e.getMessage());
} else {
throw e;
}
}
}
};
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class DefaultCube method add.
@Override
public void add(Collection<? extends CubeFact> facts) {
List<Fact> toWrite = Lists.newArrayList();
int dimValuesCount = 0;
for (CubeFact fact : facts) {
for (Map.Entry<String, ? extends Aggregation> aggEntry : aggregations.entrySet()) {
Aggregation agg = aggEntry.getValue();
AggregationAlias aggregationAlias = null;
if (aggregationAliasMap.containsKey(aggEntry.getKey())) {
aggregationAlias = aggregationAliasMap.get(aggEntry.getKey());
}
if (agg.accept(fact)) {
List<DimensionValue> dimensionValues = Lists.newArrayList();
for (String dimensionName : agg.getDimensionNames()) {
String dimensionValueKey = aggregationAlias == null ? dimensionName : aggregationAlias.getAlias(dimensionName);
dimensionValues.add(new DimensionValue(dimensionName, fact.getDimensionValues().get(dimensionValueKey)));
dimValuesCount++;
}
toWrite.add(new Fact(fact.getTimestamp(), dimensionValues, fact.getMeasurements()));
}
}
}
Map<Integer, Future<?>> futures = new HashMap<>();
for (Map.Entry<Integer, FactTable> table : resolutionToFactTable.entrySet()) {
futures.put(table.getKey(), executorService.submit(() -> table.getValue().add(toWrite)));
}
boolean failed = false;
Exception failedException = null;
StringBuilder failedMessage = new StringBuilder("Failed to add metrics to ");
for (Map.Entry<Integer, Future<?>> future : futures.entrySet()) {
try {
Uninterruptibles.getUninterruptibly(future.getValue());
} catch (ExecutionException e) {
if (!failed) {
failed = true;
failedMessage.append(String.format("the %d resolution table", future.getKey()));
} else {
failedMessage.append(String.format(", the %d resolution table", future.getKey()));
}
if (failedException == null) {
failedException = e;
} else {
failedException.addSuppressed(e);
}
}
}
if (failed) {
throw new RuntimeException(failedMessage.append(".").toString(), failedException);
}
incrementMetric("cube.cubeFact.add.request.count", 1);
incrementMetric("cube.cubeFact.added.count", facts.size());
incrementMetric("cube.tsFact.created.count", toWrite.size());
incrementMetric("cube.tsFact.created.dimValues.count", dimValuesCount);
incrementMetric("cube.tsFact.added.count", toWrite.size() * resolutionToFactTable.size());
}
use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.
the class DefaultCube method delete.
@Override
public void delete(CubeDeleteQuery query) {
// this may be very inefficient and its better to use TTL, this is to only support existing old functionality.
List<DimensionValue> dimensionValues = Lists.newArrayList();
// use the dimension values of the aggregation to delete entries in all the fact-tables.
for (Aggregation agg : aggregations.values()) {
if (query.getTagPredicate().test(agg.getDimensionNames())) {
dimensionValues.clear();
for (String dimensionName : agg.getDimensionNames()) {
dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
}
FactTable factTable = resolutionToFactTable.get(query.getResolution());
FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasureNames(), dimensionValues);
factTable.delete(scan);
}
}
}
Aggregations