Search in sources :

Example 76 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Returns metadata for a given set of entities
   *
   * @param targetIds entities for which metadata is required
   * @return map of entitiyId to set of metadata for that entity
   */
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
    if (targetIds.isEmpty()) {
        return Collections.emptySet();
    }
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
    for (NamespacedEntityId targetId : targetIds) {
        fuzzyKeys.add(getFuzzyKeyFor(targetId));
    }
    // Sort fuzzy keys
    Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
    // Scan using fuzzy filter. Scan returns one row per property.
    // Group the rows on namespacedId
    Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
    byte[] start = fuzzyKeys.get(0).getFirst();
    byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
    try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
        Row next;
        while ((next = scan.next()) != null) {
            MetadataEntry metadataEntry = convertRow(next);
            if (metadataEntry != null) {
                metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
            }
        }
    }
    // Create metadata objects for each entity from grouped rows
    Set<Metadata> metadataSet = new HashSet<>();
    for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
        Map<String, String> properties = new HashMap<>();
        Set<String> tags = Collections.emptySet();
        for (MetadataEntry metadataEntry : entry.getValue()) {
            if (TAGS_KEY.equals(metadataEntry.getKey())) {
                tags = splitTags(metadataEntry.getValue());
            } else {
                properties.put(metadataEntry.getKey(), metadataEntry.getValue());
            }
        }
        metadataSet.add(new Metadata(entry.getKey(), properties, tags));
    }
    return metadataSet;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Collection(java.util.Collection) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 77 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Return metadata based on target id, and key.
   *
   * @param targetId The id of the target
   * @param key The metadata key to get
   * @return instance of {@link MetadataEntry} for the target type, id, and key
   */
@Nullable
private MetadataEntry getMetadata(NamespacedEntityId targetId, String key) {
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, key);
    Row row = indexedTable.get(mdsKey.getKey());
    if (row.isEmpty()) {
        return null;
    }
    byte[] value = row.get(VALUE_COLUMN);
    if (value == null) {
        // This can happen when all tags are moved one by one. The row still exists, but the value is null.
        return null;
    }
    return new MetadataEntry(targetId, key, Bytes.toString(value));
}
Also used : MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 78 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataDataset method rebuildIndexes.

/**
   * Rebuilds all the indexes in the {@link MetadataDataset} in batches.
   *
   * @param startRowKey the key of the row to start the scan for the current batch with
   * @param limit the batch size
   * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
   */
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
    // Now rebuild indexes for all values in the metadata dataset
    byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
    // If startRow is null, start at the beginning, else start at the provided start row
    startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
    // stopRowKey will always be the last row key with the valueRowPrefix
    byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
    Row row;
    try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
        while ((limit > 0) && (row = scanner.next()) != null) {
            byte[] rowKey = row.getRow();
            String targetType = MdsKey.getTargetType(rowKey);
            NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
            String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
            Set<Indexer> indexers = getIndexersForKey(metadataKey);
            MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
            if (metadataEntry == null) {
                LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
                continue;
            }
            // storeIndexes deletes old indexes
            storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
            limit--;
        }
        Row startRowForNextBatch = scanner.next();
        if (startRowForNextBatch == null) {
            return null;
        }
        return startRowForNextBatch.getRow();
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 79 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Retrieves the metadata for the specified {@link NamespacedEntityId}.
   *
   * @param targetId the specified {@link NamespacedEntityId}
   * @return a Map representing the metadata for the specified {@link NamespacedEntityId}
   */
private Map<String, String> getMetadata(NamespacedEntityId targetId) {
    String targetType = EntityIdKeyHelper.getTargetType(targetId);
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null);
    byte[] startKey = mdsKey.getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
    Map<String, String> metadata = new HashMap<>();
    try (Scanner scan = indexedTable.scan(startKey, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            String key = MdsKey.getMetadataKey(targetType, next.getRow());
            byte[] value = next.get(VALUE_COLUMN);
            if (key == null || value == null) {
                continue;
            }
            metadata.put(key, Bytes.toString(value));
        }
        return metadata;
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row)

Example 80 with Row

use of co.cask.cdap.api.dataset.table.Row in project cdap by caskdata.

the class MetadataDataset method searchByDefaultIndex.

private SearchResults searchByDefaultIndex(String namespaceId, String searchQuery, Set<EntityTypeSimpleName> types, boolean showHidden, Set<EntityScope> entityScope) {
    List<MetadataEntry> results = new LinkedList<>();
    for (String searchTerm : getSearchTerms(namespaceId, searchQuery, entityScope)) {
        Scanner scanner;
        if (searchTerm.endsWith("*")) {
            // if prefixed search get start and stop key
            byte[] startKey = Bytes.toBytes(searchTerm.substring(0, searchTerm.lastIndexOf("*")));
            byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
            scanner = indexedTable.scanByIndex(Bytes.toBytes(DEFAULT_INDEX_COLUMN), startKey, stopKey);
        } else {
            byte[] value = Bytes.toBytes(searchTerm);
            scanner = indexedTable.readByIndex(Bytes.toBytes(DEFAULT_INDEX_COLUMN), value);
        }
        try {
            Row next;
            while ((next = scanner.next()) != null) {
                Optional<MetadataEntry> metadataEntry = parseRow(next, DEFAULT_INDEX_COLUMN, types, showHidden);
                if (metadataEntry.isPresent()) {
                    results.add(metadataEntry.get());
                }
            }
        } finally {
            scanner.close();
        }
    }
    // cursors are currently not supported for default indexes
    return new SearchResults(results, Collections.<String>emptyList());
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) LinkedList(java.util.LinkedList)

Aggregations

Row (co.cask.cdap.api.dataset.table.Row)111 Scanner (co.cask.cdap.api.dataset.table.Scanner)60 Test (org.junit.Test)23 Table (co.cask.cdap.api.dataset.table.Table)20 Get (co.cask.cdap.api.dataset.table.Get)16 ArrayList (java.util.ArrayList)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)16 Map (java.util.Map)15 Put (co.cask.cdap.api.dataset.table.Put)14 HashMap (java.util.HashMap)10 Scan (co.cask.cdap.api.dataset.table.Scan)9 TransactionAware (org.apache.tephra.TransactionAware)9 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)8 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)8 DatasetId (co.cask.cdap.proto.id.DatasetId)8 IOException (java.io.IOException)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Transaction (org.apache.tephra.Transaction)7 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)6 Schema (co.cask.cdap.api.data.schema.Schema)6