Examples with Scanner - co.cask.cdap.api.dataset.table.Scanner

Example 56 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method searchByCustomIndex.

private SearchResults searchByCustomIndex(String namespaceId, Set<EntityTypeSimpleName> types, SortInfo sortInfo, int offset, int limit, int numCursors, @Nullable String cursor, boolean showHidden, Set<EntityScope> entityScope) {
    List<MetadataEntry> results = new LinkedList<>();
    String indexColumn = getIndexColumn(sortInfo.getSortBy(), sortInfo.getSortOrder());
    // we want to return the first chunk of 'limit' elements after offset
    // in addition, we want to pre-fetch 'numCursors' chunks of size 'limit'.
    // Note that there's a potential for overflow so we account by limiting it to Integer.MAX_VALUE
    int fetchSize = (int) Math.min(offset + ((numCursors + 1) * (long) limit), Integer.MAX_VALUE);
    List<String> cursors = new ArrayList<>(numCursors);
    for (String searchTerm : getSearchTerms(namespaceId, "*", entityScope)) {
        byte[] startKey = Bytes.toBytes(searchTerm.substring(0, searchTerm.lastIndexOf("*")));
        byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
        // if a cursor is provided, then start at the cursor
        if (!Strings.isNullOrEmpty(cursor)) {
            String namespaceInStartKey = searchTerm.substring(0, searchTerm.indexOf(KEYVALUE_SEPARATOR));
            startKey = Bytes.toBytes(namespaceInStartKey + KEYVALUE_SEPARATOR + cursor);
        }
        // A cursor is the first element of the a chunk of ordered results. Since its always the first element,
        // we want to add a key as a cursor, if upon dividing the current number of results by the chunk size,
        // the remainder is 1. However, this is not true, when the chunk size is 1, since in that case, the
        // remainder on division can never be 1, it is always 0.
        int mod = (limit == 1) ? 0 : 1;
        try (Scanner scanner = indexedTable.scanByIndex(Bytes.toBytes(indexColumn), startKey, stopKey)) {
            Row next;
            while ((next = scanner.next()) != null && results.size() < fetchSize) {
                Optional<MetadataEntry> metadataEntry = parseRow(next, indexColumn, types, showHidden);
                if (!metadataEntry.isPresent()) {
                    continue;
                }
                results.add(metadataEntry.get());
                if (results.size() > limit + offset && (results.size() - offset) % limit == mod) {
                    // add the cursor, with the namespace removed.
                    String cursorWithNamespace = Bytes.toString(next.get(indexColumn));
                    cursors.add(cursorWithNamespace.substring(cursorWithNamespace.indexOf(KEYVALUE_SEPARATOR) + 1));
                }
            }
        }
    }
    return new SearchResults(results, cursors);
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) ArrayList(java.util.ArrayList) Row(co.cask.cdap.api.dataset.table.Row) LinkedList(java.util.LinkedList)

Example 57 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method rebuildIndexes.

/**
 * Rebuilds all the indexes in the {@link MetadataDataset} in batches.
 *
 * @param startRowKey the key of the row to start the scan for the current batch with
 * @param limit the batch size
 * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
 */
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
    // Now rebuild indexes for all values in the metadata dataset
    byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
    // If startRow is null, start at the beginning, else start at the provided start row
    startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
    // stopRowKey will always be the last row key with the valueRowPrefix
    byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
    Row row;
    try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
        while ((limit > 0) && (row = scanner.next()) != null) {
            byte[] rowKey = row.getRow();
            String targetType = MdsKey.getTargetType(rowKey);
            NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
            String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
            Set<Indexer> indexers = getIndexersForKey(metadataKey);
            MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
            if (metadataEntry == null) {
                LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
                continue;
            }
            // storeIndexes deletes old indexes
            storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
            limit--;
        }
        Row startRowForNextBatch = scanner.next();
        if (startRowForNextBatch == null) {
            return null;
        }
        return startRowForNextBatch.getRow();
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 58 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method getSnapshotBeforeTime.

private Metadata getSnapshotBeforeTime(NamespacedEntityId targetId, long timeMillis) {
    byte[] scanStartKey = MdsHistoryKey.getMdsScanStartKey(targetId, timeMillis).getKey();
    byte[] scanEndKey = MdsHistoryKey.getMdsScanEndKey(targetId).getKey();
    // TODO: add limit to scan, we need only one row
    try (Scanner scanner = indexedTable.scan(scanStartKey, scanEndKey)) {
        Row next = scanner.next();
        if (next != null) {
            return GSON.fromJson(next.getString(HISTORY_COLUMN), Metadata.class);
        } else {
            return new Metadata(targetId);
        }
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row)

Example 59 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method deleteAllIndexes.

/**
 * Delete all indexes in the metadata dataset.
 *
 * @param limit the number of rows (indexes) to delete
 * @return the offset at which to start deletion
 */
public int deleteAllIndexes(int limit) {
    byte[] indexStartPrefix = MdsKey.getIndexRowPrefix();
    byte[] indexStopPrefix = Bytes.stopKeyForPrefix(indexStartPrefix);
    int count = 0;
    Row row;
    try (Scanner scanner = indexedTable.scan(indexStartPrefix, indexStopPrefix)) {
        while (count < limit && ((row = scanner.next()) != null)) {
            if (deleteIndexRow(row)) {
                count++;
            }
        }
    }
    return count;
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row)

Example 60 with Scanner

use of co.cask.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetadataDataset method searchByDefaultIndex.

private SearchResults searchByDefaultIndex(String namespaceId, String searchQuery, Set<EntityTypeSimpleName> types, boolean showHidden, Set<EntityScope> entityScope) {
    List<MetadataEntry> results = new LinkedList<>();
    for (String searchTerm : getSearchTerms(namespaceId, searchQuery, entityScope)) {
        Scanner scanner;
        if (searchTerm.endsWith("*")) {
            // if prefixed search get start and stop key
            byte[] startKey = Bytes.toBytes(searchTerm.substring(0, searchTerm.lastIndexOf("*")));
            byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
            scanner = indexedTable.scanByIndex(Bytes.toBytes(DEFAULT_INDEX_COLUMN), startKey, stopKey);
        } else {
            byte[] value = Bytes.toBytes(searchTerm);
            scanner = indexedTable.readByIndex(Bytes.toBytes(DEFAULT_INDEX_COLUMN), value);
        }
        try {
            Row next;
            while ((next = scanner.next()) != null) {
                Optional<MetadataEntry> metadataEntry = parseRow(next, DEFAULT_INDEX_COLUMN, types, showHidden);
                if (metadataEntry.isPresent()) {
                    results.add(metadataEntry.get());
                }
            }
        } finally {
            scanner.close();
        }
    }
    // cursors are currently not supported for default indexes
    return new SearchResults(results, Collections.<String>emptyList());
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) LinkedList(java.util.LinkedList)

Aggregations

Scanner (co.cask.cdap.api.dataset.table.Scanner)78 Row (co.cask.cdap.api.dataset.table.Row)67 Scan (co.cask.cdap.api.dataset.table.Scan)14 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)13 Table (co.cask.cdap.api.dataset.table.Table)12 Map (java.util.Map)11 DatasetId (co.cask.cdap.proto.id.DatasetId)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)6 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 Put (co.cask.cdap.api.dataset.table.Put)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 SortedMap (java.util.SortedMap)5 DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)4 Get (co.cask.cdap.api.dataset.table.Get)4 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)4 ProgramSchedule (co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule)4