Search in sources :

Example 11 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testPreSplits.

@Test
public void testPreSplits() throws Exception {
    InMemoryTableService.create("presplitEntityTable");
    InMemoryTableService.create("presplitDataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
    byte[][] splits = FactTable.getSplits(3);
    long ts = System.currentTimeMillis() / 1000;
    DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
    DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
    DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
    // first agg view: dim1
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // second agg view: dim1 & dim2
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // third agg view: dim3
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // Verify all written records are spread across splits
    Scanner scanner = metricsTable.scan(null, null, null);
    Row row;
    Set<Integer> splitsWithRows = Sets.newHashSet();
    while ((row = scanner.next()) != null) {
        boolean added = false;
        for (int i = 0; i < splits.length; i++) {
            if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
                splitsWithRows.add(i);
                added = true;
                break;
            }
        }
        if (!added) {
            // falls into last split
            splitsWithRows.add(splits.length);
        }
    }
    Assert.assertEquals(3, splitsWithRows.size());
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Scanner(io.cdap.cdap.api.dataset.table.Scanner) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Row(io.cdap.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 12 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testBasics.

@Test
public void testBasics() throws Exception {
    InMemoryTableService.create("EntityTable");
    InMemoryTableService.create("DataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
    // aligned to start of resolution bucket
    // "/1000" because time is expected to be in seconds
    long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
    // testing encoding with multiple dims
    List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
    // trying adding one by one, in same (first) time resolution bucket
    for (int i = 0; i < 5; i++) {
        for (int k = 1; k < 4; k++) {
            // note: "+i" here and below doesn't affect results, just to confirm
            // that data points are rounded to the resolution
            table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
        }
    }
    // trying adding one by one, in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
        }
    }
    // trying adding as list
    // first incs in same (second) time resolution bucket
    List<Fact> aggs = Lists.newArrayList();
    for (int i = 0; i < 7; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
        }
    }
    // then incs in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
        }
    }
    table.add(aggs);
    // verify each metric
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
        assertScan(table, expected, scan);
    }
    // verify each metric within a single timeBase
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
        assertScan(table, expected, scan);
    }
    // verify all metrics with fuzzy metric in scan
    Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
    }
    // metric = null means "all"
    FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metric test
    expected.clear();
    // delete the metrics data at (timestamp + 20) resolution
    scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
    table.delete(scan);
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
    }
    // verify deletion
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metrics for "metric1" at ts0 and verify deletion
    scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
    table.delete(scan);
    expected.clear();
    expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
    assertScan(table, expected, scan);
    // verify the next dims search
    Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    Map<String, String> slice = Maps.newHashMap();
    slice.put("dim1", null);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
    // add new dim values
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
    // search for metric names given dims list and verify
    Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TimeValue(io.cdap.cdap.api.dataset.lib.cube.TimeValue) Test(org.junit.Test)

Example 13 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DefaultDatasetTypeService method createModuleConsumer.

private AbstractBodyConsumer createModuleConsumer(final DatasetModuleId datasetModuleId, final String className, final boolean forceUpdate) throws IOException, NotFoundException {
    final NamespaceId namespaceId = datasetModuleId.getParent();
    final Location namespaceHomeLocation;
    try {
        namespaceHomeLocation = impersonator.doAs(namespaceId, new Callable<Location>() {

            @Override
            public Location call() throws Exception {
                return namespacePathLocator.get(namespaceId);
            }
        });
    } catch (Exception e) {
        // the only checked exception that the callable throws is IOException
        Throwables.propagateIfInstanceOf(e, IOException.class);
        throw Throwables.propagate(e);
    }
    // verify namespace directory exists
    if (!namespaceHomeLocation.exists()) {
        String msg = String.format("Home directory %s for namespace %s not found", namespaceHomeLocation, namespaceId);
        LOG.debug(msg);
        throw new NotFoundException(msg);
    }
    // Store uploaded content to a local temp file
    String namespacesDir = cConf.get(Constants.Namespace.NAMESPACES_DIR);
    File localDataDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR));
    File namespaceBase = new File(localDataDir, namespacesDir);
    File tempDir = new File(new File(namespaceBase, datasetModuleId.getNamespace()), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
    if (!DirUtils.mkdirs(tempDir)) {
        throw new IOException("Could not create temporary directory at: " + tempDir);
    }
    return new AbstractBodyConsumer(File.createTempFile("dataset-", ".jar", tempDir)) {

        @Override
        protected void onFinish(HttpResponder responder, File uploadedFile) throws Exception {
            if (className == null) {
                // We have to delay until body upload is completed due to the fact that not all client is
                // requesting with "Expect: 100-continue" header and the client library we have cannot handle
                // connection close, and yet be able to read response reliably.
                // In longer term we should fix the client, as well as the netty-http server. However, since
                // this handler will be gone in near future, it's ok to have this workaround.
                responder.sendString(HttpResponseStatus.BAD_REQUEST, "Required header 'class-name' is absent.");
                return;
            }
            LOG.debug("Adding module {}, class name: {}", datasetModuleId, className);
            String dataFabricDir = cConf.get(Constants.Dataset.Manager.OUTPUT_DIR);
            String moduleName = datasetModuleId.getModule();
            Location archiveDir = namespaceHomeLocation.append(dataFabricDir).append(moduleName).append(Constants.ARCHIVE_DIR);
            String archiveName = moduleName + ".jar";
            Location archive = archiveDir.append(archiveName);
            // Copy uploaded content to a temporary location
            Location tmpLocation = archive.getTempFile(".tmp");
            try {
                Locations.mkdirsIfNotExists(archiveDir);
                LOG.debug("Copy from {} to {}", uploadedFile, tmpLocation);
                Files.copy(uploadedFile, Locations.newOutputSupplier(tmpLocation));
                // Finally, move archive to final location
                LOG.debug("Storing module {} jar at {}", datasetModuleId, archive);
                if (tmpLocation.renameTo(archive) == null) {
                    throw new IOException(String.format("Could not move archive from location: %s, to location: %s", tmpLocation, archive));
                }
                typeManager.addModule(datasetModuleId, className, archive, forceUpdate);
                // todo: response with DatasetModuleMeta of just added module (and log this info)
                // Ideally this should have been done before, but we cannot grant privileges on types until they've been
                // added to the type MDS. First revoke any orphaned privileges for types left behind by past failed revokes
                LOG.info("Added module {}", datasetModuleId);
                responder.sendStatus(HttpResponseStatus.OK);
            } catch (Exception e) {
                // In case copy to temporary file failed, or rename failed
                try {
                    tmpLocation.delete();
                } catch (IOException ex) {
                    LOG.warn("Failed to cleanup temporary location {}", tmpLocation);
                }
                if (e instanceof DatasetModuleConflictException) {
                    responder.sendString(HttpResponseStatus.CONFLICT, e.getMessage());
                } else {
                    throw e;
                }
            }
        }
    };
}
Also used : HttpResponder(io.cdap.http.HttpResponder) DatasetModuleConflictException(io.cdap.cdap.data2.datafabric.dataset.type.DatasetModuleConflictException) AbstractBodyConsumer(io.cdap.cdap.common.http.AbstractBodyConsumer) NotFoundException(io.cdap.cdap.common.NotFoundException) DatasetTypeNotFoundException(io.cdap.cdap.common.DatasetTypeNotFoundException) NamespaceNotFoundException(io.cdap.cdap.common.NamespaceNotFoundException) DatasetModuleNotFoundException(io.cdap.cdap.common.DatasetModuleNotFoundException) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) IOException(java.io.IOException) File(java.io.File) Callable(java.util.concurrent.Callable) DatasetModuleCannotBeDeletedException(io.cdap.cdap.common.DatasetModuleCannotBeDeletedException) NotFoundException(io.cdap.cdap.common.NotFoundException) DatasetTypeNotFoundException(io.cdap.cdap.common.DatasetTypeNotFoundException) NamespaceNotFoundException(io.cdap.cdap.common.NamespaceNotFoundException) DatasetModuleConflictException(io.cdap.cdap.data2.datafabric.dataset.type.DatasetModuleConflictException) IOException(java.io.IOException) ConflictException(io.cdap.cdap.common.ConflictException) DatasetModuleNotFoundException(io.cdap.cdap.common.DatasetModuleNotFoundException) Location(org.apache.twill.filesystem.Location)

Example 14 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DefaultCube method add.

@Override
public void add(Collection<? extends CubeFact> facts) {
    List<Fact> toWrite = Lists.newArrayList();
    int dimValuesCount = 0;
    for (CubeFact fact : facts) {
        for (Map.Entry<String, ? extends Aggregation> aggEntry : aggregations.entrySet()) {
            Aggregation agg = aggEntry.getValue();
            AggregationAlias aggregationAlias = null;
            if (aggregationAliasMap.containsKey(aggEntry.getKey())) {
                aggregationAlias = aggregationAliasMap.get(aggEntry.getKey());
            }
            if (agg.accept(fact)) {
                List<DimensionValue> dimensionValues = Lists.newArrayList();
                for (String dimensionName : agg.getDimensionNames()) {
                    String dimensionValueKey = aggregationAlias == null ? dimensionName : aggregationAlias.getAlias(dimensionName);
                    dimensionValues.add(new DimensionValue(dimensionName, fact.getDimensionValues().get(dimensionValueKey)));
                    dimValuesCount++;
                }
                toWrite.add(new Fact(fact.getTimestamp(), dimensionValues, fact.getMeasurements()));
            }
        }
    }
    Map<Integer, Future<?>> futures = new HashMap<>();
    for (Map.Entry<Integer, FactTable> table : resolutionToFactTable.entrySet()) {
        futures.put(table.getKey(), executorService.submit(() -> table.getValue().add(toWrite)));
    }
    boolean failed = false;
    Exception failedException = null;
    StringBuilder failedMessage = new StringBuilder("Failed to add metrics to ");
    for (Map.Entry<Integer, Future<?>> future : futures.entrySet()) {
        try {
            Uninterruptibles.getUninterruptibly(future.getValue());
        } catch (ExecutionException e) {
            if (!failed) {
                failed = true;
                failedMessage.append(String.format("the %d resolution table", future.getKey()));
            } else {
                failedMessage.append(String.format(", the %d resolution table", future.getKey()));
            }
            if (failedException == null) {
                failedException = e;
            } else {
                failedException.addSuppressed(e);
            }
        }
    }
    if (failed) {
        throw new RuntimeException(failedMessage.append(".").toString(), failedException);
    }
    incrementMetric("cube.cubeFact.add.request.count", 1);
    incrementMetric("cube.cubeFact.added.count", facts.size());
    incrementMetric("cube.tsFact.created.count", toWrite.size());
    incrementMetric("cube.tsFact.created.dimValues.count", dimValuesCount);
    incrementMetric("cube.tsFact.added.count", toWrite.size() * resolutionToFactTable.size());
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Fact(io.cdap.cdap.data2.dataset2.lib.timeseries.Fact) CubeFact(io.cdap.cdap.api.dataset.lib.cube.CubeFact) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CubeFact(io.cdap.cdap.api.dataset.lib.cube.CubeFact) FactTable(io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 15 with Fact

use of io.cdap.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DefaultCube method delete.

@Override
public void delete(CubeDeleteQuery query) {
    // this may be very inefficient and its better to use TTL, this is to only support existing old functionality.
    List<DimensionValue> dimensionValues = Lists.newArrayList();
    // use the dimension values of the aggregation to delete entries in all the fact-tables.
    for (Aggregation agg : aggregations.values()) {
        if (query.getTagPredicate().test(agg.getDimensionNames())) {
            dimensionValues.clear();
            for (String dimensionName : agg.getDimensionNames()) {
                dimensionValues.add(new DimensionValue(dimensionName, query.getDimensionValues().get(dimensionName)));
            }
            FactTable factTable = resolutionToFactTable.get(query.getResolution());
            FactScan scan = new FactScan(query.getStartTs(), query.getEndTs(), query.getMeasureNames(), dimensionValues);
            factTable.delete(scan);
        }
    }
}
Also used : FactScan(io.cdap.cdap.data2.dataset2.lib.timeseries.FactScan) FactTable(io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue)

Aggregations

DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)7 DimensionValue (co.cask.cdap.api.dataset.lib.cube.DimensionValue)5 Test (org.junit.Test)5 Measurement (io.cdap.cdap.api.dataset.lib.cube.Measurement)4 ImmutableList (com.google.common.collect.ImmutableList)3 Row (io.cdap.cdap.api.dataset.table.Row)3 Scanner (io.cdap.cdap.api.dataset.table.Scanner)3 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)3 InMemoryMetricsTable (io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Map (java.util.Map)3 Measurement (co.cask.cdap.api.dataset.lib.cube.Measurement)2 Row (co.cask.cdap.api.dataset.table.Row)2 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)2 InMemoryMetricsTable (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)2 FactTable (co.cask.cdap.data2.dataset2.lib.timeseries.FactTable)2 FactTable (io.cdap.cdap.data2.dataset2.lib.timeseries.FactTable)2