Search in sources :

Example 1 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class DefaultMetadataStore method removeProperties.

/**
 * Removes the specified properties of the {@link NamespacedEntityId}.
 */
@Override
public void removeProperties(final MetadataScope scope, final NamespacedEntityId namespacedEntityId, final String... keys) {
    final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>();
    final ImmutableMap.Builder<String, String> deletesBuilder = ImmutableMap.builder();
    execute(new TransactionExecutor.Procedure<MetadataDataset>() {

        @Override
        public void apply(MetadataDataset input) throws Exception {
            previousRef.set(new MetadataRecord(namespacedEntityId, scope, input.getProperties(namespacedEntityId), input.getTags(namespacedEntityId)));
            for (String key : keys) {
                MetadataEntry record = input.getProperty(namespacedEntityId, key);
                if (record == null) {
                    continue;
                }
                deletesBuilder.put(record.getKey(), record.getValue());
            }
            input.removeProperties(namespacedEntityId, keys);
        }
    }, scope);
    publishAudit(previousRef.get(), new MetadataRecord(namespacedEntityId, scope), new MetadataRecord(namespacedEntityId, scope, deletesBuilder.build(), EMPTY_TAGS));
}
Also used : MetadataDataset(co.cask.cdap.data2.metadata.dataset.MetadataDataset) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionExecutor(org.apache.tephra.TransactionExecutor) MetadataRecord(co.cask.cdap.common.metadata.MetadataRecord) ImmutableMap(com.google.common.collect.ImmutableMap) BadRequestException(co.cask.cdap.common.BadRequestException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException)

Example 2 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class DefaultMetadataStore method search.

private MetadataSearchResponse search(Set<MetadataScope> scopes, String namespaceId, String searchQuery, Set<EntityTypeSimpleName> types, SortInfo sortInfo, int offset, int limit, int numCursors, String cursor, boolean showHidden, Set<EntityScope> entityScope) throws BadRequestException {
    if (offset < 0) {
        throw new IllegalArgumentException("offset must not be negative");
    }
    if (limit < 0) {
        throw new IllegalArgumentException("limit must not be negative");
    }
    List<MetadataEntry> results = new LinkedList<>();
    List<String> cursors = new LinkedList<>();
    for (MetadataScope scope : scopes) {
        SearchResults searchResults = getSearchResults(scope, namespaceId, searchQuery, types, sortInfo, offset, limit, numCursors, cursor, showHidden, entityScope);
        results.addAll(searchResults.getResults());
        cursors.addAll(searchResults.getCursors());
    }
    // sort if required
    Set<NamespacedEntityId> sortedEntities = getSortedEntities(results, sortInfo);
    int total = sortedEntities.size();
    // pagination is not performed at the dataset level, because:
    // 1. scoring is needed for DEFAULT sort info. So perform it here for now.
    // 2. Even when using custom sorting, we need to remove elements from the beginning to the offset and the cursors
    // at the end
    // TODO: Figure out how all of this can be done server (HBase) side
    int startIndex = Math.min(offset, sortedEntities.size());
    // Account for overflow
    int endIndex = (int) Math.min(Integer.MAX_VALUE, (long) offset + limit);
    endIndex = Math.min(endIndex, sortedEntities.size());
    // add 1 to maxIndex because end index is exclusive
    sortedEntities = new LinkedHashSet<>(ImmutableList.copyOf(sortedEntities).subList(startIndex, endIndex));
    // Fetch metadata for entities in the result list
    // Note: since the fetch is happening in a different transaction, the metadata for entities may have been
    // removed. It is okay not to have metadata for some results in case this happens.
    Map<NamespacedEntityId, Metadata> systemMetadata = fetchMetadata(sortedEntities, MetadataScope.SYSTEM);
    Map<NamespacedEntityId, Metadata> userMetadata = fetchMetadata(sortedEntities, MetadataScope.USER);
    return new MetadataSearchResponse(sortInfo.getSortBy() + " " + sortInfo.getSortOrder(), offset, limit, numCursors, total, addMetadataToEntities(sortedEntities, systemMetadata, userMetadata), cursors, showHidden, entityScope);
}
Also used : Metadata(co.cask.cdap.data2.metadata.dataset.Metadata) MetadataSearchResponse(co.cask.cdap.proto.metadata.MetadataSearchResponse) SearchResults(co.cask.cdap.data2.metadata.dataset.SearchResults) LinkedList(java.util.LinkedList) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) MetadataScope(co.cask.cdap.api.metadata.MetadataScope)

Example 3 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class DefaultMetadataStore method getSortedEntities.

private Set<NamespacedEntityId> getSortedEntities(List<MetadataEntry> results, SortInfo sortInfo) {
    // in this case, the backing storage is expected to return results in the expected order.
    if (SortInfo.SortOrder.WEIGHTED != sortInfo.getSortOrder()) {
        Set<NamespacedEntityId> entities = new LinkedHashSet<>(results.size());
        for (MetadataEntry metadataEntry : results) {
            entities.add(metadataEntry.getTargetId());
        }
        return entities;
    }
    // if sort order is weighted, score results by weight, and return in descending order of weights
    // Score results
    final Map<NamespacedEntityId, Integer> weightedResults = new HashMap<>();
    for (MetadataEntry metadataEntry : results) {
        Integer score = weightedResults.get(metadataEntry.getTargetId());
        score = (score == null) ? 0 : score;
        weightedResults.put(metadataEntry.getTargetId(), score + 1);
    }
    // Sort the results by score
    List<Map.Entry<NamespacedEntityId, Integer>> resultList = new ArrayList<>(weightedResults.entrySet());
    Collections.sort(resultList, SEARCH_RESULT_DESC_SCORE_COMPARATOR);
    Set<NamespacedEntityId> result = new LinkedHashSet<>(resultList.size());
    for (Map.Entry<NamespacedEntityId, Integer> entry : resultList) {
        result.add(entry.getKey());
    }
    return result;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 4 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class InvertedValueIndexerTest method testSimple.

@Test
public void testSimple() {
    List<String> inputs = ImmutableList.of("134342", "435ert5", "trdfrw", "_bfcfd", "r34_r3", "cgsdfgs)dfd", "gfsgfd2345245234", "dfsgs");
    // expected is reverse sorted input
    List<String> expected = new ArrayList<>(inputs);
    Collections.sort(expected, Collections.<String>reverseOrder());
    List<String> invertedIndexes = new ArrayList<>();
    for (String input : inputs) {
        invertedIndexes.add(Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", input))));
    }
    // inverted indexes sorted in ascending order
    Collections.sort(invertedIndexes);
    for (int i = 0; i < invertedIndexes.size(); i++) {
        String invertedIndex = invertedIndexes.get(i);
        String original = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", invertedIndex)));
        Assert.assertEquals(expected.get(i), original);
    }
}
Also used : ArrayList(java.util.ArrayList) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) Test(org.junit.Test)

Example 5 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SchemaIndexerTest method testComplexRecord.

@Test
public void testComplexRecord() throws Exception {
    Schema complexSchema = Schema.recordOf("record1", Schema.Field.of("map1", Schema.mapOf(Schema.recordOf("record21", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE)))), Schema.arrayOf(Schema.recordOf("record22", Schema.Field.of("a", // Map<array<byte[]>, Map<boolean,byte[]> a
    Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)), Schema.mapOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.BYTES)))))))), Schema.Field.of("i", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("j", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.LONG), Schema.of(Schema.Type.NULL))));
    Schema anotherComplexSchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
    Schema superComplexSchema = Schema.unionOf(complexSchema, anotherComplexSchema, Schema.of(Schema.Type.NULL));
    Set<String> expected = ImmutableSet.of("map1", "map1:MAP", "record21", "record21:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP", "record22", "record22:RECORD", "a", "a:MAP", "i", "i:INT", "j", "j:UNION", "record1", "record1:RECORD");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, superComplexSchema.toString()));
    Assert.assertEquals(addKeyPrefix(expected), actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)16 MetadataEntry (io.cdap.cdap.data2.metadata.dataset.MetadataEntry)13 MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)7 HashSet (java.util.HashSet)7 ArrayList (java.util.ArrayList)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 DatasetId (io.cdap.cdap.proto.id.DatasetId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 Schema (io.cdap.cdap.api.data.schema.Schema)3 MetadataEntity (io.cdap.cdap.api.metadata.MetadataEntity)3 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)3 HashMap (java.util.HashMap)3 LinkedHashSet (java.util.LinkedHashSet)3 Map (java.util.Map)3 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)2 Put (io.cdap.cdap.api.dataset.table.Put)2 Row (io.cdap.cdap.api.dataset.table.Row)2 Indexer (io.cdap.cdap.data2.metadata.indexer.Indexer)2 InvertedValueIndexer (io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer)2