Search in sources :

Example 1 with Fact

use of co.cask.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method getScanner.

private Scanner getScanner(FactScan scan) {
    // sort the measures based on their entity ids and based on that get the start and end row key metric names
    List<String> measureNames = getSortedMeasures(scan.getMeasureNames());
    byte[] startRow = codec.createStartRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(0), scan.getStartTs(), false);
    byte[] endRow = codec.createEndRowKey(scan.getDimensionValues(), measureNames.isEmpty() ? null : measureNames.get(measureNames.size() - 1), scan.getEndTs(), false);
    byte[][] columns;
    if (Arrays.equals(startRow, endRow)) {
        // If on the same timebase, we only need subset of columns
        long timeBase = scan.getStartTs() / rollTime * rollTime;
        int startCol = (int) (scan.getStartTs() - timeBase) / resolution;
        int endCol = (int) (scan.getEndTs() - timeBase) / resolution;
        columns = new byte[endCol - startCol + 1][];
        for (int i = 0; i < columns.length; i++) {
            columns[i] = Bytes.toBytes((short) (startCol + i));
        }
    }
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = measureNames.isEmpty() ? createFuzzyRowFilter(scan, startRow) : createFuzzyRowFilter(scan, measureNames);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Scanning fact table {} with scan: {}; constructed startRow: {}, endRow: {}, fuzzyRowFilter: {}", timeSeriesTable, scan, toPrettyLog(startRow), toPrettyLog(endRow), fuzzyRowFilter);
    }
    return timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter);
}
Also used : FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 2 with Fact

use of co.cask.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTable method findMeasureNames.

/**
 * Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
 * @param allDimensionNames list of all dimension names to be present in the fact record
 * @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
 * @param startTs start timestamp, in sec
 * @param endTs end timestamp, in sec
 * @return {@link Set} of measure names
 */
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
    List<DimensionValue> allDimensions = Lists.newArrayList();
    for (String dimensionName : allDimensionNames) {
        allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
    }
    byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
    byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, ImmutableList.<String>of(), allDimensions), startRow);
    Set<String> measureNames = Sets.newHashSet();
    int scannedRecords = 0;
    try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
        Row rowResult;
        while ((rowResult = scanner.next()) != null) {
            scannedRecords++;
            if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
                break;
            }
            byte[] rowKey = rowResult.getRow();
            // filter out columns by time range (scan configuration only filters whole rows)
            if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
                continue;
            }
            if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
                // we're done with scanner
                break;
            }
            measureNames.add(codec.getMeasureName(rowResult.getRow()));
        }
    }
    LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
    return measureNames;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) Row(co.cask.cdap.api.dataset.table.Row) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 3 with Fact

use of co.cask.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testPreSplits.

@Test
public void testPreSplits() throws Exception {
    InMemoryTableService.create("presplitEntityTable");
    InMemoryTableService.create("presplitDataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    InMemoryMetricsTable metricsTable = new InMemoryMetricsTable("presplitDataTable");
    FactTable table = new FactTable(metricsTable, new EntityTable(new InMemoryMetricsTable("presplitEntityTable")), resolution, rollTimebaseInterval);
    byte[][] splits = FactTable.getSplits(3);
    long ts = System.currentTimeMillis() / 1000;
    DimensionValue dimVal1 = new DimensionValue("dim1", "value1");
    DimensionValue dimVal2 = new DimensionValue("dim2", "value2");
    DimensionValue dimVal3 = new DimensionValue("dim3", "value3");
    // first agg view: dim1
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // second agg view: dim1 & dim2
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal1, dimVal2), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // third agg view: dim3
    table.add(ImmutableList.of(new Fact(ts, ImmutableList.of(dimVal3), new Measurement("metric1", MeasureType.COUNTER, 1))));
    // Verify all written records are spread across splits
    Scanner scanner = metricsTable.scan(null, null, null);
    Row row;
    Set<Integer> splitsWithRows = Sets.newHashSet();
    while ((row = scanner.next()) != null) {
        boolean added = false;
        for (int i = 0; i < splits.length; i++) {
            if (Bytes.compareTo(row.getRow(), splits[i]) < 0) {
                splitsWithRows.add(i);
                added = true;
                break;
            }
        }
        if (!added) {
            // falls into last split
            splitsWithRows.add(splits.length);
        }
    }
    Assert.assertEquals(3, splitsWithRows.size());
}
Also used : Measurement(co.cask.cdap.api.dataset.lib.cube.Measurement) Scanner(co.cask.cdap.api.dataset.table.Scanner) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) Row(co.cask.cdap.api.dataset.table.Row) Test(org.junit.Test)

Example 4 with Fact

use of co.cask.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class FactTableTest method testBasics.

@Test
public void testBasics() throws Exception {
    InMemoryTableService.create("EntityTable");
    InMemoryTableService.create("DataTable");
    int resolution = 10;
    int rollTimebaseInterval = 2;
    FactTable table = new FactTable(new InMemoryMetricsTable("DataTable"), new EntityTable(new InMemoryMetricsTable("EntityTable")), resolution, rollTimebaseInterval);
    // aligned to start of resolution bucket
    // "/1000" because time is expected to be in seconds
    long ts = ((System.currentTimeMillis() / 1000) / resolution) * resolution;
    // testing encoding with multiple dims
    List<DimensionValue> dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value2"), new DimensionValue("dim3", "value3"));
    // trying adding one by one, in same (first) time resolution bucket
    for (int i = 0; i < 5; i++) {
        for (int k = 1; k < 4; k++) {
            // note: "+i" here and below doesn't affect results, just to confirm
            // that data points are rounded to the resolution
            table.add(ImmutableList.of(new Fact(ts + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, k))));
        }
    }
    // trying adding one by one, in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            table.add(ImmutableList.of(new Fact(ts + resolution * i + i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 2 * k))));
        }
    }
    // trying adding as list
    // first incs in same (second) time resolution bucket
    List<Fact> aggs = Lists.newArrayList();
    for (int i = 0; i < 7; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 3 * k)));
        }
    }
    // then incs in different time resolution buckets
    for (int i = 0; i < 3; i++) {
        for (int k = 1; k < 4; k++) {
            aggs.add(new Fact(ts + resolution * i, dimensionValues, new Measurement("metric" + k, MeasureType.COUNTER, 4 * k)));
        }
    }
    table.add(aggs);
    // verify each metric
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
        assertScan(table, expected, scan);
    }
    // verify each metric within a single timeBase
    for (int k = 1; k < 4; k++) {
        FactScan scan = new FactScan(ts, ts + resolution - 1, "metric" + k, dimensionValues);
        Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k)));
        assertScan(table, expected, scan);
    }
    // verify all metrics with fuzzy metric in scan
    Table<String, List<DimensionValue>, List<TimeValue>> expected = HashBasedTable.create();
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k), new TimeValue(ts + 2 * resolution, 6 * k)));
    }
    // metric = null means "all"
    FactScan scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metric test
    expected.clear();
    // delete the metrics data at (timestamp + 20) resolution
    scan = new FactScan(ts + resolution * 2, ts + resolution * 3, dimensionValues);
    table.delete(scan);
    for (int k = 1; k < 4; k++) {
        expected.put("metric" + k, dimensionValues, ImmutableList.of(new TimeValue(ts, 11 * k), new TimeValue(ts + resolution, 27 * k)));
    }
    // verify deletion
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, dimensionValues);
    assertScan(table, expected, scan);
    // delete metrics for "metric1" at ts0 and verify deletion
    scan = new FactScan(ts, ts + 1, "metric1", dimensionValues);
    table.delete(scan);
    expected.clear();
    expected.put("metric1", dimensionValues, ImmutableList.of(new TimeValue(ts + resolution, 27)));
    scan = new FactScan(ts - 2 * resolution, ts + 3 * resolution, "metric1", dimensionValues);
    assertScan(table, expected, scan);
    // verify the next dims search
    Collection<DimensionValue> nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    Map<String, String> slice = Maps.newHashMap();
    slice.put("dim1", null);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2")), nextTags);
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2"), ts, ts + 3);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim3", "value3")), nextTags);
    // add new dim values
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", null));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    dimensionValues = ImmutableList.of(new DimensionValue("dim1", "value1"), new DimensionValue("dim2", null), new DimensionValue("dim3", "value3"));
    table.add(ImmutableList.of(new Fact(ts, dimensionValues, new Measurement("metric", MeasureType.COUNTER, 10))));
    nextTags = table.findSingleDimensionValue(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of(new DimensionValue("dim2", "value2"), new DimensionValue("dim2", "value5"), new DimensionValue("dim3", "value3")), nextTags);
    // search for metric names given dims list and verify
    Collection<String> metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1", "dim2", "value2", "dim3", "value3"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim1", "value1"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), ImmutableMap.of("dim2", "value2"), ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric2", "metric3"), metricNames);
    metricNames = table.findMeasureNames(ImmutableList.of("dim1", "dim2", "dim3"), slice, ts, ts + 1);
    Assert.assertEquals(ImmutableSet.of("metric", "metric2", "metric3"), metricNames);
}
Also used : Measurement(co.cask.cdap.api.dataset.lib.cube.Measurement) DimensionValue(co.cask.cdap.api.dataset.lib.cube.DimensionValue) InMemoryMetricsTable(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TimeValue(co.cask.cdap.api.dataset.lib.cube.TimeValue) Test(org.junit.Test)

Example 5 with Fact

use of co.cask.cdap.data2.dataset2.lib.timeseries.Fact in project cdap by caskdata.

the class DatasetTypeService method createModuleConsumer.

private AbstractBodyConsumer createModuleConsumer(final DatasetModuleId datasetModuleId, final String className, final boolean forceUpdate, final Principal principal) throws IOException, NotFoundException {
    final NamespaceId namespaceId = datasetModuleId.getParent();
    final Location namespaceHomeLocation;
    try {
        namespaceHomeLocation = impersonator.doAs(namespaceId, new Callable<Location>() {

            @Override
            public Location call() throws Exception {
                return namespacedLocationFactory.get(namespaceId);
            }
        });
    } catch (Exception e) {
        // the only checked exception that the callable throws is IOException
        Throwables.propagateIfInstanceOf(e, IOException.class);
        throw Throwables.propagate(e);
    }
    // verify namespace directory exists
    if (!namespaceHomeLocation.exists()) {
        String msg = String.format("Home directory %s for namespace %s not found", namespaceHomeLocation, namespaceId);
        LOG.debug(msg);
        throw new NotFoundException(msg);
    }
    // Store uploaded content to a local temp file
    String namespacesDir = cConf.get(Constants.Namespace.NAMESPACES_DIR);
    File localDataDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR));
    File namespaceBase = new File(localDataDir, namespacesDir);
    File tempDir = new File(new File(namespaceBase, datasetModuleId.getNamespace()), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
    if (!DirUtils.mkdirs(tempDir)) {
        throw new IOException("Could not create temporary directory at: " + tempDir);
    }
    return new AbstractBodyConsumer(File.createTempFile("dataset-", ".jar", tempDir)) {

        @Override
        protected void onFinish(HttpResponder responder, File uploadedFile) throws Exception {
            if (className == null) {
                // We have to delay until body upload is completed due to the fact that not all client is
                // requesting with "Expect: 100-continue" header and the client library we have cannot handle
                // connection close, and yet be able to read response reliably.
                // In longer term we should fix the client, as well as the netty-http server. However, since
                // this handler will be gone in near future, it's ok to have this workaround.
                responder.sendString(HttpResponseStatus.BAD_REQUEST, "Required header 'class-name' is absent.");
                return;
            }
            LOG.debug("Adding module {}, class name: {}", datasetModuleId, className);
            String dataFabricDir = cConf.get(Constants.Dataset.Manager.OUTPUT_DIR);
            String moduleName = datasetModuleId.getModule();
            Location archiveDir = namespaceHomeLocation.append(dataFabricDir).append(moduleName).append(Constants.ARCHIVE_DIR);
            String archiveName = moduleName + ".jar";
            Location archive = archiveDir.append(archiveName);
            // Copy uploaded content to a temporary location
            Location tmpLocation = archive.getTempFile(".tmp");
            try {
                Locations.mkdirsIfNotExists(archiveDir);
                LOG.debug("Copy from {} to {}", uploadedFile, tmpLocation);
                Files.copy(uploadedFile, Locations.newOutputSupplier(tmpLocation));
                // Finally, move archive to final location
                LOG.debug("Storing module {} jar at {}", datasetModuleId, archive);
                if (tmpLocation.renameTo(archive) == null) {
                    throw new IOException(String.format("Could not move archive from location: %s, to location: %s", tmpLocation, archive));
                }
                typeManager.addModule(datasetModuleId, className, archive, forceUpdate);
                // todo: response with DatasetModuleMeta of just added module (and log this info)
                // Ideally this should have been done before, but we cannot grant privileges on types until they've been
                // added to the type MDS. First revoke any orphaned privileges for types left behind by past failed revokes
                revokeAllPrivilegesOnModule(datasetModuleId);
                grantAllPrivilegesOnModule(datasetModuleId, principal);
                LOG.info("Added module {}", datasetModuleId);
                responder.sendStatus(HttpResponseStatus.OK);
            } catch (Exception e) {
                // There was a problem in deploying the dataset module. so revoke the privileges.
                revokeAllPrivilegesOnModule(datasetModuleId);
                // In case copy to temporary file failed, or rename failed
                try {
                    tmpLocation.delete();
                } catch (IOException ex) {
                    LOG.warn("Failed to cleanup temporary location {}", tmpLocation);
                }
                if (e instanceof DatasetModuleConflictException) {
                    responder.sendString(HttpResponseStatus.CONFLICT, e.getMessage());
                } else {
                    throw e;
                }
            }
        }
    };
}
Also used : HttpResponder(co.cask.http.HttpResponder) DatasetModuleConflictException(co.cask.cdap.data2.datafabric.dataset.type.DatasetModuleConflictException) AbstractBodyConsumer(co.cask.cdap.common.http.AbstractBodyConsumer) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) DatasetTypeNotFoundException(co.cask.cdap.common.DatasetTypeNotFoundException) DatasetModuleNotFoundException(co.cask.cdap.common.DatasetModuleNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) NamespaceId(co.cask.cdap.proto.id.NamespaceId) IOException(java.io.IOException) File(java.io.File) Callable(java.util.concurrent.Callable) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) ConflictException(co.cask.cdap.common.ConflictException) DatasetModuleConflictException(co.cask.cdap.data2.datafabric.dataset.type.DatasetModuleConflictException) DatasetTypeNotFoundException(co.cask.cdap.common.DatasetTypeNotFoundException) UnauthorizedException(co.cask.cdap.security.spi.authorization.UnauthorizedException) DatasetModuleNotFoundException(co.cask.cdap.common.DatasetModuleNotFoundException) IOException(java.io.IOException) DatasetModuleCannotBeDeletedException(co.cask.cdap.common.DatasetModuleCannotBeDeletedException) NotFoundException(co.cask.cdap.common.NotFoundException) Location(org.apache.twill.filesystem.Location)

Aggregations

DimensionValue (co.cask.cdap.api.dataset.lib.cube.DimensionValue)5 Measurement (co.cask.cdap.api.dataset.lib.cube.Measurement)2 Row (co.cask.cdap.api.dataset.table.Row)2 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 ConflictException (co.cask.cdap.common.ConflictException)2 DatasetModuleCannotBeDeletedException (co.cask.cdap.common.DatasetModuleCannotBeDeletedException)2 DatasetModuleNotFoundException (co.cask.cdap.common.DatasetModuleNotFoundException)2 DatasetTypeNotFoundException (co.cask.cdap.common.DatasetTypeNotFoundException)2 NamespaceNotFoundException (co.cask.cdap.common.NamespaceNotFoundException)2 NotFoundException (co.cask.cdap.common.NotFoundException)2 AbstractBodyConsumer (co.cask.cdap.common.http.AbstractBodyConsumer)2 DatasetModuleConflictException (co.cask.cdap.data2.datafabric.dataset.type.DatasetModuleConflictException)2 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)2 InMemoryMetricsTable (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryMetricsTable)2 FactTable (co.cask.cdap.data2.dataset2.lib.timeseries.FactTable)2 NamespaceId (co.cask.cdap.proto.id.NamespaceId)2 HttpResponder (co.cask.http.HttpResponder)2 File (java.io.File)2 IOException (java.io.IOException)2 Callable (java.util.concurrent.Callable)2