Search in sources :

Example 26 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class MetadataDataset method rebuildIndexes.

/**
 * Rebuilds all the indexes in the {@link MetadataDataset} in batches.
 *
 * @param startRowKey the key of the row to start the scan for the current batch with
 * @param limit the batch size
 * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
 */
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
    // Now rebuild indexes for all values in the metadata dataset
    byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
    // If startRow is null, start at the beginning, else start at the provided start row
    startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
    // stopRowKey will always be the last row key with the valueRowPrefix
    byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
    Row row;
    try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
        while ((limit > 0) && (row = scanner.next()) != null) {
            byte[] rowKey = row.getRow();
            String targetType = MdsKey.getTargetType(rowKey);
            NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
            String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
            Set<Indexer> indexers = getIndexersForKey(metadataKey);
            MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
            if (metadataEntry == null) {
                LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
                continue;
            }
            // storeIndexes deletes old indexes
            storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
            limit--;
        }
        Row startRowForNextBatch = scanner.next();
        if (startRowForNextBatch == null) {
            return null;
        }
        return startRowForNextBatch.getRow();
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 27 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
 * Return metadata based on target id, and key.
 *
 * @param targetId The id of the target
 * @param key The metadata key to get
 * @return instance of {@link MetadataEntry} for the target type, id, and key
 */
@Nullable
private MetadataEntry getMetadata(NamespacedEntityId targetId, String key) {
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, key);
    Row row = indexedTable.get(mdsKey.getKey());
    if (row.isEmpty()) {
        return null;
    }
    byte[] value = row.get(VALUE_COLUMN);
    if (value == null) {
        // This can happen when all tags are moved one by one. The row still exists, but the value is null.
        return null;
    }
    return new MetadataEntry(targetId, key, Bytes.toString(value));
}
Also used : MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 28 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
 * Retrieves the metadata for the specified {@link NamespacedEntityId}.
 *
 * @param targetId the specified {@link NamespacedEntityId}
 * @return a Map representing the metadata for the specified {@link NamespacedEntityId}
 */
private Map<String, String> getMetadata(NamespacedEntityId targetId) {
    String targetType = EntityIdKeyHelper.getTargetType(targetId);
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null);
    byte[] startKey = mdsKey.getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(startKey);
    Map<String, String> metadata = new HashMap<>();
    try (Scanner scan = indexedTable.scan(startKey, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            String key = MdsKey.getMetadataKey(targetType, next.getRow());
            byte[] value = next.get(VALUE_COLUMN);
            if (key == null || value == null) {
                continue;
            }
            metadata.put(key, Bytes.toString(value));
        }
        return metadata;
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row)

Example 29 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class MetadataDataset method removeMetadata.

/**
 * Removes all keys that satisfy a given predicate from the metadata of the specified {@link NamespacedEntityId}.
 *
 * @param targetId the {@link NamespacedEntityId} for which keys are to be removed
 * @param filter the {@link Predicate} that should be satisfied to remove a key
 */
private void removeMetadata(NamespacedEntityId targetId, Predicate<String> filter) {
    String targetType = EntityIdKeyHelper.getTargetType(targetId);
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null);
    byte[] prefix = mdsKey.getKey();
    byte[] stopKey = Bytes.stopKeyForPrefix(prefix);
    List<String> deletedMetadataKeys = new LinkedList<>();
    try (Scanner scan = indexedTable.scan(prefix, stopKey)) {
        Row next;
        while ((next = scan.next()) != null) {
            String value = next.getString(VALUE_COLUMN);
            if (value == null) {
                continue;
            }
            String metadataKey = MdsKey.getMetadataKey(targetType, next.getRow());
            if (filter.apply(metadataKey)) {
                indexedTable.delete(new Delete(next.getRow()));
                // store the key to delete its indexes later
                deletedMetadataKeys.add(metadataKey);
            }
        }
    }
    // delete all the indexes for all deleted metadata key
    for (String deletedMetadataKey : deletedMetadataKeys) {
        deleteIndexes(targetId, deletedMetadataKey);
    }
    writeHistory(targetId);
}
Also used : Delete(co.cask.cdap.api.dataset.table.Delete) Scanner(co.cask.cdap.api.dataset.table.Scanner) MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row) LinkedList(java.util.LinkedList)

Example 30 with Metadata

use of co.cask.cdap.data2.metadata.dataset.Metadata in project cdap by caskdata.

the class DefaultMetadataStore method fetchMetadata.

private Map<NamespacedEntityId, Metadata> fetchMetadata(final Set<NamespacedEntityId> namespacedEntityIds, MetadataScope scope) {
    Set<Metadata> metadataSet = execute(new TransactionExecutor.Function<MetadataDataset, Set<Metadata>>() {

        @Override
        public Set<Metadata> apply(MetadataDataset input) throws Exception {
            return input.getMetadata(namespacedEntityIds);
        }
    }, scope);
    Map<NamespacedEntityId, Metadata> metadataMap = new HashMap<>();
    for (Metadata m : metadataSet) {
        metadataMap.put(m.getEntityId(), m);
    }
    return metadataMap;
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataDataset(co.cask.cdap.data2.metadata.dataset.MetadataDataset) EnumSet(java.util.EnumSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) Metadata(co.cask.cdap.data2.metadata.dataset.Metadata) TransactionExecutor(org.apache.tephra.TransactionExecutor) BadRequestException(co.cask.cdap.common.BadRequestException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)9 BadRequestException (co.cask.cdap.common.BadRequestException)8 MetadataRecord (co.cask.cdap.common.metadata.MetadataRecord)7 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)7 Row (co.cask.cdap.api.dataset.table.Row)6 Scanner (co.cask.cdap.api.dataset.table.Scanner)5 DatasetId (co.cask.cdap.proto.id.DatasetId)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 Test (org.junit.Test)5 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 NotFoundException (co.cask.cdap.common.NotFoundException)4 MetadataDataset (co.cask.cdap.data2.metadata.dataset.MetadataDataset)4 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)4 Relation (co.cask.cdap.data2.metadata.lineage.Relation)4 NamespaceId (co.cask.cdap.proto.id.NamespaceId)4 HashMap (java.util.HashMap)4 ConfigModule (co.cask.cdap.common.guice.ConfigModule)3 LocationRuntimeModule (co.cask.cdap.common.guice.LocationRuntimeModule)3 DataSetsModules (co.cask.cdap.data.runtime.DataSetsModules)3 SystemDatasetRuntimeModule (co.cask.cdap.data.runtime.SystemDatasetRuntimeModule)3