Search in sources :

Example 31 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class TableTest method testBatchWritableKeyIsIgnored.

@Test
public void testBatchWritableKeyIsIgnored() throws Exception {
    String tableName = "batchWritableTable";
    getTableAdmin(CONTEXT1, tableName).create();
    try (Table table = getTable(CONTEXT1, tableName)) {
        // write in a transaction, three times, with key = null, a, q, always Put with row = a
        Transaction tx = txClient.startShort();
        ((TransactionAware) table).startTx(tx);
        table.write(null, new Put("a").add("x", "x"));
        table.write(new byte[] { 'q' }, new Put("a").add("y", "y"));
        table.write(new byte[] { 'a' }, new Put("a").add("z", "z"));
        txClient.canCommitOrThrow(tx, ((TransactionAware) table).getTxChanges());
        ((TransactionAware) table).commitTx();
        txClient.commitOrThrow(tx);
        // validate that all writes went to row a, and row q was not written
        tx = txClient.startShort();
        ((TransactionAware) table).startTx(tx);
        Assert.assertTrue(table.get(new Get("q")).isEmpty());
        Row row = table.get(new Get("a"));
        Assert.assertEquals(3, row.getColumns().size());
        Assert.assertEquals("x", row.getString("x"));
        Assert.assertEquals("y", row.getString("y"));
        Assert.assertEquals("z", row.getString("z"));
        ((TransactionAware) table).commitTx();
        txClient.abort(tx);
    } finally {
        getTableAdmin(CONTEXT1, tableName).drop();
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Get(io.cdap.cdap.api.dataset.table.Get) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put) Test(org.junit.Test)

Example 32 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class BufferingTableTest method verify.

private void verify(BufferingTable table, byte[] row, byte[] col, byte[] val) throws Exception {
    // get column
    Assert.assertArrayEquals(val, table.get(row, col));
    // get set of columns
    Row getColSetRow = table.get(row, new byte[][] { col });
    Map<byte[], byte[]> getColSetResult = getColSetRow.getColumns();
    Assert.assertEquals(1, getColSetResult.size());
    Assert.assertArrayEquals(val, getColSetResult.get(col));
    // get row
    Row getRow = table.get(row);
    Map<byte[], byte[]> getRowResult = getRow.getColumns();
    Assert.assertEquals(1, getRowResult.size());
    Assert.assertArrayEquals(val, getRowResult.get(col));
    // scan
    Scanner scan = table.scan(row, null);
    Row next = scan.next();
    Assert.assertNotNull(next);
    Assert.assertArrayEquals(row, next.getRow());
    Assert.assertArrayEquals(val, next.get(col));
    Assert.assertNull(scan.next());
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row)

Example 33 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class FactTable method add.

public void add(List<Fact> facts) {
    // Simply collecting all rows/cols/values that need to be put to the underlying table.
    NavigableMap<byte[], NavigableMap<byte[], Long>> gaugesTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    NavigableMap<byte[], NavigableMap<byte[], Long>> incrementsTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    // this map is used to store metrics which was COUNTER type, but can be considered as GAUGE, which means it is
    // guaranteed to be a new row key in the underlying table.
    NavigableMap<byte[], NavigableMap<byte[], Long>> incGaugeTable = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
    // this map is used to store the updated timestamp for the cache
    Map<FactCacheKey, Long> cacheUpdates = new HashMap<>();
    for (Fact fact : facts) {
        for (Measurement measurement : fact.getMeasurements()) {
            byte[] rowKey = codec.createRowKey(fact.getDimensionValues(), measurement.getName(), fact.getTimestamp());
            byte[] column = codec.createColumn(fact.getTimestamp());
            if (MeasureType.COUNTER == measurement.getType()) {
                if (factCounterCache != null) {
                    // round to the resolution timestamp
                    long tsToResolution = fact.getTimestamp() / resolution * resolution;
                    FactCacheKey cacheKey = new FactCacheKey(fact.getDimensionValues(), measurement.getName());
                    Long existingTs = factCounterCache.getIfPresent(cacheKey);
                    // cannot be considered as a gauge, and we should update the incrementsTable
                    if (existingTs == null || existingTs >= tsToResolution) {
                        inc(incrementsTable, rowKey, column, measurement.getValue());
                    // if the current ts is greater than existing ts, then we can consider this metric as a newly seen metric
                    // and perform gauge on this metric
                    } else {
                        inc(incGaugeTable, rowKey, column, measurement.getValue());
                    }
                    // should be updated
                    if (existingTs == null || existingTs < tsToResolution) {
                        cacheUpdates.compute(cacheKey, (key, oldValue) -> oldValue == null || tsToResolution > oldValue ? tsToResolution : oldValue);
                    }
                } else {
                    inc(incrementsTable, rowKey, column, measurement.getValue());
                }
            } else {
                gaugesTable.computeIfAbsent(rowKey, k -> Maps.newTreeMap(Bytes.BYTES_COMPARATOR)).put(column, measurement.getValue());
            }
        }
    }
    if (factCounterCache != null) {
        gaugesTable.putAll(incGaugeTable);
        factCounterCache.putAll(cacheUpdates);
    }
    // todo: replace with single call, to be able to optimize rpcs in underlying table
    timeSeriesTable.put(gaugesTable);
    timeSeriesTable.increment(incrementsTable);
    if (metrics != null) {
        metrics.increment(putCountMetric, gaugesTable.size());
        metrics.increment(incrementCountMetric, incrementsTable.size());
    }
}
Also used : Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Arrays(java.util.Arrays) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) LoggerFactory(org.slf4j.LoggerFactory) Bytes(io.cdap.cdap.api.common.Bytes) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter) HashMap(java.util.HashMap) MetricsTable(io.cdap.cdap.data2.dataset2.lib.table.MetricsTable) ArrayList(java.util.ArrayList) Row(io.cdap.cdap.api.dataset.table.Row) MetricsCollector(io.cdap.cdap.api.metrics.MetricsCollector) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Scanner(io.cdap.cdap.api.dataset.table.Scanner) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) NavigableMap(java.util.NavigableMap) Maps(com.google.common.collect.Maps) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) TreeMap(java.util.TreeMap) Measurement(io.cdap.cdap.api.dataset.lib.cube.Measurement) Closeable(java.io.Closeable) Preconditions(com.google.common.base.Preconditions) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) MeasureType(io.cdap.cdap.api.dataset.lib.cube.MeasureType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CacheBuilder(com.google.common.cache.CacheBuilder) Cache(com.google.common.cache.Cache) Comparator(java.util.Comparator) Collections(java.util.Collections) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap)

Example 34 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class FactTable method findMeasureNames.

/**
 * Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
 * @param allDimensionNames list of all dimension names to be present in the fact record
 * @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
 * @param startTs start timestamp, in sec
 * @param endTs end timestamp, in sec
 * @return {@link Set} of measure names
 */
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
    List<DimensionValue> allDimensions = Lists.newArrayList();
    for (String dimensionName : allDimensionNames) {
        allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
    }
    byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
    byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, Collections.emptyList(), allDimensions), startRow);
    Set<String> measureNames = Sets.newHashSet();
    int scannedRecords = 0;
    try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
        Row rowResult;
        while ((rowResult = scanner.next()) != null) {
            scannedRecords++;
            if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
                break;
            }
            byte[] rowKey = rowResult.getRow();
            // filter out columns by time range (scan configuration only filters whole rows)
            if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
                continue;
            }
            if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
                // we're done with scanner
                break;
            }
            measureNames.add(codec.getMeasureName(rowResult.getRow()));
        }
    }
    LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
    return measureNames;
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Row(io.cdap.cdap.api.dataset.table.Row) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 35 with Row

use of io.cdap.cdap.api.dataset.table.Row in project cdap by caskdata.

the class FactTable method delete.

/**
 * Delete entries in fact table.
 * @param scan specifies deletion criteria
 */
public void delete(FactScan scan) {
    try (Scanner scanner = getScanner(scan)) {
        Row row;
        while ((row = scanner.next()) != null) {
            List<byte[]> columns = Lists.newArrayList();
            boolean exhausted = false;
            for (byte[] column : row.getColumns().keySet()) {
                long ts = codec.getTimestamp(row.getRow(), column);
                if (ts < scan.getStartTs()) {
                    continue;
                }
                if (ts > scan.getEndTs()) {
                    exhausted = true;
                    break;
                }
                columns.add(column);
            }
            // todo: do deletes efficiently, in batches, not one-by-one
            timeSeriesTable.delete(row.getRow(), columns.toArray(new byte[columns.size()][]));
            if (exhausted) {
                break;
            }
        }
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row)

Aggregations

Row (io.cdap.cdap.api.dataset.table.Row)166 Scanner (io.cdap.cdap.api.dataset.table.Scanner)81 Test (org.junit.Test)50 Table (io.cdap.cdap.api.dataset.table.Table)34 Put (io.cdap.cdap.api.dataset.table.Put)29 ArrayList (java.util.ArrayList)26 TransactionExecutor (org.apache.tephra.TransactionExecutor)26 Get (io.cdap.cdap.api.dataset.table.Get)24 Schema (io.cdap.cdap.api.data.schema.Schema)21 HashMap (java.util.HashMap)19 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)16 Transaction (org.apache.tephra.Transaction)16 TransactionAware (org.apache.tephra.TransactionAware)16 IOException (java.io.IOException)14 Map (java.util.Map)14 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)12 WriteOnly (io.cdap.cdap.api.annotation.WriteOnly)10 DimensionValue (io.cdap.cdap.api.dataset.lib.cube.DimensionValue)10 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10