Examples with Scanner - io.cdap.cdap.api.dataset.table.Scanner

Example 26 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class TableTest method testMetrics.

private void testMetrics(boolean readless) throws Exception {
    final String tableName = "survive";
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(readless).build();
    DatasetAdmin admin = getTableAdmin(CONTEXT1, tableName, props);
    admin.create();
    try (Table table = getTable(CONTEXT1, tableName, props)) {
        final Map<String, Long> metrics = Maps.newHashMap();
        ((MeteredDataset) table).setMetricsCollector(new MetricsCollector() {

            @Override
            public void increment(String metricName, long value) {
                Long old = metrics.get(metricName);
                metrics.put(metricName, old == null ? value : old + value);
            }

            @Override
            public void gauge(String metricName, long value) {
                metrics.put(metricName, value);
            }
        });
        // Note that we don't need to finish tx for metrics to be reported
        Transaction tx0 = txClient.startShort();
        ((TransactionAware) table).startTx(tx0);
        int writes = 0;
        int reads = 0;
        table.put(new Put(R1, C1, V1));
        verifyDatasetMetrics(metrics, ++writes, reads);
        table.compareAndSwap(R1, C1, V1, V2);
        verifyDatasetMetrics(metrics, ++writes, ++reads);
        // note: will not write anything as expected value will not match
        table.compareAndSwap(R1, C1, V1, V2);
        verifyDatasetMetrics(metrics, writes, ++reads);
        table.increment(new Increment(R2, C2, 1L));
        if (readless) {
            verifyDatasetMetrics(metrics, ++writes, reads);
        } else {
            verifyDatasetMetrics(metrics, ++writes, ++reads);
        }
        table.incrementAndGet(new Increment(R2, C2, 1L));
        verifyDatasetMetrics(metrics, ++writes, ++reads);
        table.get(new Get(R1, C1, V1));
        verifyDatasetMetrics(metrics, writes, ++reads);
        Scanner scanner = table.scan(new Scan(null, null));
        while (scanner.next() != null) {
            verifyDatasetMetrics(metrics, writes, ++reads);
        }
        table.delete(new Delete(R1, C1, V1));
        verifyDatasetMetrics(metrics, ++writes, reads);
    } finally {
        // drop table
        admin.drop();
    }
}

Also used : MetricsCollector(io.cdap.cdap.api.metrics.MetricsCollector) Delete(io.cdap.cdap.api.dataset.table.Delete) Scanner(io.cdap.cdap.api.dataset.table.Scanner) Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) Put(io.cdap.cdap.api.dataset.table.Put) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Increment(io.cdap.cdap.api.dataset.table.Increment) Get(io.cdap.cdap.api.dataset.table.Get) MeteredDataset(io.cdap.cdap.api.dataset.metrics.MeteredDataset) Scan(io.cdap.cdap.api.dataset.table.Scan)

Example 27 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class BufferingTableTest method verify.

private void verify(BufferingTable table, byte[] row, byte[] col, byte[] val) throws Exception {
    // get column
    Assert.assertArrayEquals(val, table.get(row, col));
    // get set of columns
    Row getColSetRow = table.get(row, new byte[][] { col });
    Map<byte[], byte[]> getColSetResult = getColSetRow.getColumns();
    Assert.assertEquals(1, getColSetResult.size());
    Assert.assertArrayEquals(val, getColSetResult.get(col));
    // get row
    Row getRow = table.get(row);
    Map<byte[], byte[]> getRowResult = getRow.getColumns();
    Assert.assertEquals(1, getRowResult.size());
    Assert.assertArrayEquals(val, getRowResult.get(col));
    // scan
    Scanner scan = table.scan(row, null);
    Row next = scan.next();
    Assert.assertNotNull(next);
    Assert.assertArrayEquals(row, next.getRow());
    Assert.assertArrayEquals(val, next.get(col));
    Assert.assertNull(scan.next());
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row)

Example 28 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class FactTable method findMeasureNames.

/**
 * Finds all measure names of the facts that match given {@link DimensionValue}s and time range.
 * @param allDimensionNames list of all dimension names to be present in the fact record
 * @param dimensionSlice dimension values to filter by, {@code null} means any non-null value.
 * @param startTs start timestamp, in sec
 * @param endTs end timestamp, in sec
 * @return {@link Set} of measure names
 */
// todo: pass a limit on number of measures returned
public Set<String> findMeasureNames(List<String> allDimensionNames, Map<String, String> dimensionSlice, long startTs, long endTs) {
    List<DimensionValue> allDimensions = Lists.newArrayList();
    for (String dimensionName : allDimensionNames) {
        allDimensions.add(new DimensionValue(dimensionName, dimensionSlice.get(dimensionName)));
    }
    byte[] startRow = codec.createStartRowKey(allDimensions, null, startTs, false);
    byte[] endRow = codec.createEndRowKey(allDimensions, null, endTs, false);
    endRow = Bytes.stopKeyForPrefix(endRow);
    FuzzyRowFilter fuzzyRowFilter = createFuzzyRowFilter(new FactScan(startTs, endTs, Collections.emptyList(), allDimensions), startRow);
    Set<String> measureNames = Sets.newHashSet();
    int scannedRecords = 0;
    try (Scanner scanner = timeSeriesTable.scan(startRow, endRow, fuzzyRowFilter)) {
        Row rowResult;
        while ((rowResult = scanner.next()) != null) {
            scannedRecords++;
            if (scannedRecords > MAX_RECORDS_TO_SCAN_DURING_SEARCH) {
                break;
            }
            byte[] rowKey = rowResult.getRow();
            // filter out columns by time range (scan configuration only filters whole rows)
            if (codec.getTimestamp(rowKey, codec.createColumn(startTs)) < startTs) {
                continue;
            }
            if (codec.getTimestamp(rowKey, codec.createColumn(endTs)) > endTs) {
                // we're done with scanner
                break;
            }
            measureNames.add(codec.getMeasureName(rowResult.getRow()));
        }
    }
    LOG.trace("search for measures completed, scanned records: {}", scannedRecords);
    return measureNames;
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) DimensionValue(io.cdap.cdap.api.dataset.lib.cube.DimensionValue) Row(io.cdap.cdap.api.dataset.table.Row) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)

Example 29 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class FactTable method delete.

/**
 * Delete entries in fact table.
 * @param scan specifies deletion criteria
 */
public void delete(FactScan scan) {
    try (Scanner scanner = getScanner(scan)) {
        Row row;
        while ((row = scanner.next()) != null) {
            List<byte[]> columns = Lists.newArrayList();
            boolean exhausted = false;
            for (byte[] column : row.getColumns().keySet()) {
                long ts = codec.getTimestamp(row.getRow(), column);
                if (ts < scan.getStartTs()) {
                    continue;
                }
                if (ts > scan.getEndTs()) {
                    exhausted = true;
                    break;
                }
                columns.add(column);
            }
            // todo: do deletes efficiently, in batches, not one-by-one
            timeSeriesTable.delete(row.getRow(), columns.toArray(new byte[columns.size()][]));
            if (exhausted) {
                break;
            }
        }
    }
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row)

Example 30 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MockRuntimeDatasetSink method readOutput.

/**
 * Used to read the records written by this sink.
 *
 * @param tableManager dataset manager used to get the sink dataset to read from
 */
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
    Table table = tableManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        List<StructuredRecord> records = new ArrayList<>();
        Row row;
        while ((row = scanner.next()) != null) {
            Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
            String recordStr = row.getString(RECORD_COL);
            records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
        }
        return records;
    }
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Row(io.cdap.cdap.api.dataset.table.Row) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord)

Aggregations

Scanner (io.cdap.cdap.api.dataset.table.Scanner)104 Row (io.cdap.cdap.api.dataset.table.Row)77 Test (org.junit.Test)26 Table (io.cdap.cdap.api.dataset.table.Table)14 ArrayList (java.util.ArrayList)14 Scan (io.cdap.cdap.api.dataset.table.Scan)12 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)12 HashMap (java.util.HashMap)11 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)10 DatasetId (io.cdap.cdap.proto.id.DatasetId)10 TransactionExecutor (org.apache.tephra.TransactionExecutor)10 Schema (io.cdap.cdap.api.data.schema.Schema)9 DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)8 TableId (io.cdap.cdap.data2.util.TableId)8 IOException (java.io.IOException)8 List (java.util.List)8 Transaction (org.apache.tephra.Transaction)8 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)6 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)6 Delete (io.cdap.cdap.api.dataset.table.Delete)6