Examples with Scanner - io.cdap.cdap.api.dataset.table.Scanner

Example 6 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class IndexedTableTest method testIndexedRangeLookups.

@Test
public void testIndexedRangeLookups() throws Exception {
    DatasetId indexRangedLookupDs = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("rangeLookup");
    dsFrameworkUtil.createInstance("indexedTable", indexRangedLookupDs, DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, idxColString).build());
    final IndexedTable iTable = dsFrameworkUtil.getInstance(indexRangedLookupDs);
    TransactionExecutor txnl = dsFrameworkUtil.newTransactionExecutor(iTable);
    try {
        // start a new transaction
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // perform 5 puts, using idx values 1,2,3,4,5
                iTable.put(new Put(keyE).add(idxCol, idx4).add(valCol, valE));
                iTable.put(new Put(keyC).add(idxCol, idx1).add(valCol, valC));
                iTable.put(new Put(keyD).add(idxCol, idx5).add(valCol, valA));
                iTable.put(new Put(keyB).add(idxCol, idx2).add(valCol, valB));
                iTable.put(new Put(keyA).add(idxCol, idx3).add(valCol, valD));
            }
        });
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // do a scan using idx value range [idx2, idx5). Assert that we retrieve idx2, idx3, idx4.
                Scanner scanner = iTable.scanByIndex(idxCol, idx2, idx5);
                Row next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyB, next.getRow()));
                Assert.assertTrue(Bytes.equals(valB, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyA, next.getRow()));
                Assert.assertTrue(Bytes.equals(valD, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyE, next.getRow()));
                Assert.assertTrue(Bytes.equals(valE, next.get(valCol)));
                assertEmpty(scanner);
            }
        });
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // do a scan using idx value range [null (first row), idx3). Assert that we retrieve the values corresponding
                // to idx1, idx2.
                Scanner scanner = iTable.scanByIndex(idxCol, null, idx3);
                Row next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyC, next.getRow()));
                Assert.assertTrue(Bytes.equals(valC, next.get(valCol)));
                next = scanner.next();
                Assert.assertNotNull(next);
                Assert.assertTrue(Bytes.equals(keyB, next.getRow()));
                Assert.assertTrue(Bytes.equals(valB, next.get(valCol)));
                assertEmpty(scanner);
            }
        });
    } finally {
        dsFrameworkUtil.deleteInstance(indexRangedLookupDs);
    }
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) TransactionExecutor(org.apache.tephra.TransactionExecutor) Row(io.cdap.cdap.api.dataset.table.Row) Put(io.cdap.cdap.api.dataset.table.Put) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 7 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class HBaseMetricsTable method initializeVars.

private void initializeVars(CConfiguration cConf, DatasetSpecification spec) {
    this.scanExecutor = null;
    this.rowKeyDistributor = null;
    RejectedExecutionHandler callerRunsPolicy = (r, executor) -> {
        REJECTION_LOG.info("No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}", Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName());
        // Runs it from the caller thread
        if (!executor.isShutdown()) {
            r.run();
        }
    };
    int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS);
    // Creates a executor that will shrink to 0 threads if left idle
    // Uses daemon thread, hence no need to worry about shutdown
    // When all threads are busy, use the caller thread to execute
    this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"), callerRunsPolicy);
    this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
}

Also used : Arrays(java.util.Arrays) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) TableProperties(io.cdap.cdap.api.dataset.table.TableProperties) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) Result(org.apache.hadoop.hbase.client.Result) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Increment(org.apache.hadoop.hbase.client.Increment) LoggerFactory(org.slf4j.LoggerFactory) TableId(io.cdap.cdap.data2.util.TableId) Bytes(io.cdap.cdap.api.common.Bytes) Loggers(io.cdap.cdap.common.logging.Loggers) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) MetricsTable(io.cdap.cdap.data2.dataset2.lib.table.MetricsTable) AbstractRowKeyDistributor(io.cdap.cdap.hbase.wd.AbstractRowKeyDistributor) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PutBuilder(io.cdap.cdap.data2.util.hbase.PutBuilder) Lists(com.google.common.collect.Lists) Delete(org.apache.hadoop.hbase.client.Delete) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Scanner(io.cdap.cdap.api.dataset.table.Scanner) BufferedMutator(org.apache.hadoop.hbase.client.BufferedMutator) DistributedScanner(io.cdap.cdap.hbase.wd.DistributedScanner) HBaseTableUtil(io.cdap.cdap.data2.util.hbase.HBaseTableUtil) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Pair(org.apache.hadoop.hbase.util.Pair) Threads(org.apache.twill.common.Threads) Logger(org.slf4j.Logger) ScanBuilder(io.cdap.cdap.data2.util.hbase.ScanBuilder) SynchronousQueue(java.util.concurrent.SynchronousQueue) Put(org.apache.hadoop.hbase.client.Put) Get(org.apache.hadoop.hbase.client.Get) IOException(java.io.IOException) NavigableMap(java.util.NavigableMap) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) DeleteBuilder(io.cdap.cdap.data2.util.hbase.DeleteBuilder) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Table(org.apache.hadoop.hbase.client.Table) Constants(io.cdap.cdap.common.conf.Constants) LogSamplers(io.cdap.cdap.common.logging.LogSamplers) DatasetContext(io.cdap.cdap.api.dataset.DatasetContext) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) SortedMap(java.util.SortedMap) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) SynchronousQueue(java.util.concurrent.SynchronousQueue) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 8 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class LevelDBTableCore method deleteRange.

public void deleteRange(byte[] startRow, byte[] stopRow, @Nullable FuzzyRowFilter filter, @Nullable byte[][] columns) throws IOException {
    if (columns != null) {
        if (columns.length == 0) {
            return;
        }
        columns = Arrays.copyOf(columns, columns.length);
        Arrays.sort(columns, Bytes.BYTES_COMPARATOR);
    }
    DB db = getDB();
    DBIterator iterator = db.iterator();
    seekToStart(iterator, startRow);
    byte[] endKey = stopRow == null ? null : createStartKey(stopRow);
    DBIterator deleteIterator = db.iterator();
    seekToStart(deleteIterator, startRow);
    // todo make configurable
    final int deletesPerRound = 1024;
    try (Scanner scanner = new LevelDBScanner(iterator, endKey, filter, columns, null)) {
        Row rowValues;
        WriteBatch batch = db.createWriteBatch();
        int deletesInBatch = 0;
        // go through all matching cells and delete them in batches.
        while ((rowValues = scanner.next()) != null) {
            byte[] row = rowValues.getRow();
            for (byte[] column : rowValues.getColumns().keySet()) {
                addToDeleteBatch(batch, deleteIterator, row, column);
                deletesInBatch++;
                // perform the deletes when we have built up a batch.
                if (deletesInBatch >= deletesPerRound) {
                    // delete all the entries that were found
                    db.write(batch, getWriteOptions());
                    batch = db.createWriteBatch();
                    deletesInBatch = 0;
                }
            }
        }
        // perform any outstanding deletes
        if (deletesInBatch > 0) {
            db.write(batch, getWriteOptions());
        }
    } finally {
        deleteIterator.close();
    }
}

Also used : DBIterator(org.iq80.leveldb.DBIterator) Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row) WriteBatch(org.iq80.leveldb.WriteBatch) DB(org.iq80.leveldb.DB)

Example 9 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method removeMetadata.

/**
 * Removes all keys that satisfy a given predicate from the metadata of the specified {@link MetadataEntity}.
 * @param metadataEntity the {@link MetadataEntity} for which keys are to be removed
 * @param filter the {@link Predicate} that should be satisfied to remove a key
 */
private Change removeMetadata(MetadataEntity metadataEntity, Predicate<String> filter) {
    MDSKey mdsKey = MetadataKey.createValueRowKey(metadataEntity, null);
    byte[] prefix = mdsKey.getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(prefix);
    Map<String, String> existingMetadata = new HashMap<>();
    Map<String, String> deletedMetadata = new HashMap<>();
    try (Scanner scan = indexedTable.scan(prefix, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            String value = next.getString(VALUE_COLUMN);
            if (value == null) {
                continue;
            }
            String metadataKey = MetadataKey.extractMetadataKey(next.getRow());
            // put all the metadata for this entity as existing
            existingMetadata.put(metadataKey, value);
            if (filter.test(metadataKey)) {
                // if the key matches the key to be deleted delete it and put it in deleted
                indexedTable.delete(new Delete(next.getRow()));
                // store the key to delete its indexes later
                deletedMetadata.put(metadataKey, value);
            }
        }
    }
    // current metadata is existing - deleted
    Map<String, String> currentMetadata = new HashMap<>(existingMetadata);
    // delete all the indexes for all deleted metadata key
    for (String deletedMetadataKey : deletedMetadata.keySet()) {
        deleteIndexes(metadataEntity, deletedMetadataKey);
        currentMetadata.remove(deletedMetadataKey);
    }
    Record changedMetadata = getMetadata(metadataEntity, currentMetadata);
    writeHistory(changedMetadata);
    return new Change(getMetadata(metadataEntity, existingMetadata), changedMetadata);
}

Also used : Delete(io.cdap.cdap.api.dataset.table.Delete) Scanner(io.cdap.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) MDSKey(io.cdap.cdap.data2.dataset2.lib.table.MDSKey) Row(io.cdap.cdap.api.dataset.table.Row)

Example 10 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method searchByDefaultIndex.

private SearchResults searchByDefaultIndex(SearchRequest request) {
    List<MetadataEntry> results = new LinkedList<>();
    String column = request.isNamespaced() ? DEFAULT_INDEX_COLUMN.getColumn() : DEFAULT_INDEX_COLUMN.getCrossNamespaceColumn();
    for (SearchTerm searchTerm : getSearchTerms(request)) {
        Scanner scanner;
        if (searchTerm.isPrefix()) {
            // if prefixed search get start and stop key
            byte[] startKey = Bytes.toBytes(searchTerm.getTerm());
            @SuppressWarnings("ConstantConditions") byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
            scanner = indexedTable.scanByIndex(Bytes.toBytes(column), startKey, stopKey);
        } else {
            byte[] value = Bytes.toBytes(searchTerm.getTerm());
            scanner = indexedTable.readByIndex(Bytes.toBytes(column), value);
        }
        try {
            Row next;
            while ((next = scanner.next()) != null) {
                Optional<MetadataEntry> metadataEntry = parseRow(next, column, request.getTypes(), request.shouldShowHidden());
                metadataEntry.ifPresent(results::add);
            }
        } finally {
            scanner.close();
        }
    }
    // cursors are currently not supported for default indexes
    return new SearchResults(results, Collections.emptyList());
}

Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) Row(io.cdap.cdap.api.dataset.table.Row) LinkedList(java.util.LinkedList)

Aggregations

Scanner (io.cdap.cdap.api.dataset.table.Scanner)104 Row (io.cdap.cdap.api.dataset.table.Row)77 Test (org.junit.Test)26 Table (io.cdap.cdap.api.dataset.table.Table)14 ArrayList (java.util.ArrayList)14 Scan (io.cdap.cdap.api.dataset.table.Scan)12 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)12 HashMap (java.util.HashMap)11 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)10 DatasetId (io.cdap.cdap.proto.id.DatasetId)10 TransactionExecutor (org.apache.tephra.TransactionExecutor)10 Schema (io.cdap.cdap.api.data.schema.Schema)9 DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)8 TableId (io.cdap.cdap.data2.util.TableId)8 IOException (java.io.IOException)8 List (java.util.List)8 Transaction (org.apache.tephra.Transaction)8 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)6 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)6 Delete (io.cdap.cdap.api.dataset.table.Delete)6