Search in sources :

Example 6 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class InvertedValueIndexerTest method testSimple.

@Test
public void testSimple() {
    List<String> inputs = ImmutableList.of("134342", "435ert5", "trdfrw", "_bfcfd", "r34_r3", "cgsdfgs)dfd", "gfsgfd2345245234", "dfsgs");
    // expected is reverse sorted input
    List<String> expected = new ArrayList<>(inputs);
    Collections.sort(expected, Collections.<String>reverseOrder());
    List<String> invertedIndexes = new ArrayList<>();
    for (String input : inputs) {
        invertedIndexes.add(Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", input))));
    }
    // inverted indexes sorted in ascending order
    Collections.sort(invertedIndexes);
    for (int i = 0; i < invertedIndexes.size(); i++) {
        String invertedIndex = invertedIndexes.get(i);
        String original = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", invertedIndex)));
        Assert.assertEquals(expected.get(i), original);
    }
}
Also used : ArrayList(java.util.ArrayList) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) Test(org.junit.Test)

Example 7 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleSchema.

@Test
public void testSimpleSchema() throws Exception {
    Schema simpleSchema = Schema.of(Schema.Type.INT);
    Set<String> expected = Collections.emptySet();
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(expected, actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 8 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SchemaIndexerTest method testComplexRecord.

@Test
public void testComplexRecord() throws Exception {
    Schema complexSchema = Schema.recordOf("record1", Schema.Field.of("map1", Schema.mapOf(Schema.recordOf("record21", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE)))), Schema.arrayOf(Schema.recordOf("record22", Schema.Field.of("a", // Map<array<byte[]>, Map<boolean,byte[]> a
    Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)), Schema.mapOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.BYTES)))))))), Schema.Field.of("i", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("j", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.LONG), Schema.of(Schema.Type.NULL))));
    Schema anotherComplexSchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
    Schema superComplexSchema = Schema.unionOf(complexSchema, anotherComplexSchema, Schema.of(Schema.Type.NULL));
    Set<String> expected = ImmutableSet.of("map1", "map1:MAP", "record21", "record21:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP", "record22", "record22:RECORD", "a", "a:MAP", "i", "i:INT", "j", "j:UNION", "record1", "record1:RECORD");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, superComplexSchema.toString()));
    Assert.assertEquals(addKeyPrefix(expected), actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 9 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Returns metadata for a given set of entities
   *
   * @param targetIds entities for which metadata is required
   * @return map of entitiyId to set of metadata for that entity
   */
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
    if (targetIds.isEmpty()) {
        return Collections.emptySet();
    }
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
    for (NamespacedEntityId targetId : targetIds) {
        fuzzyKeys.add(getFuzzyKeyFor(targetId));
    }
    // Sort fuzzy keys
    Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
    // Scan using fuzzy filter. Scan returns one row per property.
    // Group the rows on namespacedId
    Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
    byte[] start = fuzzyKeys.get(0).getFirst();
    byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
    try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
        Row next;
        while ((next = scan.next()) != null) {
            MetadataEntry metadataEntry = convertRow(next);
            if (metadataEntry != null) {
                metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
            }
        }
    }
    // Create metadata objects for each entity from grouped rows
    Set<Metadata> metadataSet = new HashSet<>();
    for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
        Map<String, String> properties = new HashMap<>();
        Set<String> tags = Collections.emptySet();
        for (MetadataEntry metadataEntry : entry.getValue()) {
            if (TAGS_KEY.equals(metadataEntry.getKey())) {
                tags = splitTags(metadataEntry.getValue());
            } else {
                properties.put(metadataEntry.getKey(), metadataEntry.getValue());
            }
        }
        metadataSet.add(new Metadata(entry.getKey(), properties, tags));
    }
    return metadataSet;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Collection(java.util.Collection) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 10 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Return metadata based on target id, and key.
   *
   * @param targetId The id of the target
   * @param key The metadata key to get
   * @return instance of {@link MetadataEntry} for the target type, id, and key
   */
@Nullable
private MetadataEntry getMetadata(NamespacedEntityId targetId, String key) {
    MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, key);
    Row row = indexedTable.get(mdsKey.getKey());
    if (row.isEmpty()) {
        return null;
    }
    byte[] value = row.get(VALUE_COLUMN);
    if (value == null) {
        // This can happen when all tags are moved one by one. The row still exists, but the value is null.
        return null;
    }
    return new MetadataEntry(targetId, key, Bytes.toString(value));
}
Also used : MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Aggregations

MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)7 Test (org.junit.Test)6 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)4 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)4 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 Row (co.cask.cdap.api.dataset.table.Row)3 BadRequestException (co.cask.cdap.common.BadRequestException)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 ArrayList (java.util.ArrayList)3 TransactionExecutor (org.apache.tephra.TransactionExecutor)3 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)2 DefaultValueIndexer (co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer)2 InvertedTimeIndexer (co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)2 SchemaIndexer (co.cask.cdap.data2.metadata.indexer.SchemaIndexer)2 ValueOnlyIndexer (co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2