use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.
the class InvertedValueIndexerTest method testSimple.
@Test
public void testSimple() {
List<String> inputs = ImmutableList.of("134342", "435ert5", "trdfrw", "_bfcfd", "r34_r3", "cgsdfgs)dfd", "gfsgfd2345245234", "dfsgs");
// expected is reverse sorted input
List<String> expected = new ArrayList<>(inputs);
Collections.sort(expected, Collections.<String>reverseOrder());
List<String> invertedIndexes = new ArrayList<>();
for (String input : inputs) {
invertedIndexes.add(Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", input))));
}
// inverted indexes sorted in ascending order
Collections.sort(invertedIndexes);
for (int i = 0; i < invertedIndexes.size(); i++) {
String invertedIndex = invertedIndexes.get(i);
String original = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(ns, "dontcare", invertedIndex)));
Assert.assertEquals(expected.get(i), original);
}
}
use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.
the class SchemaIndexerTest method testSimpleSchema.
@Test
public void testSimpleSchema() throws Exception {
Schema simpleSchema = Schema.of(Schema.Type.INT);
Set<String> expected = Collections.emptySet();
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
Assert.assertEquals(expected, actual);
}
use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.
the class SchemaIndexerTest method testComplexRecord.
@Test
public void testComplexRecord() throws Exception {
Schema complexSchema = Schema.recordOf("record1", Schema.Field.of("map1", Schema.mapOf(Schema.recordOf("record21", // String x
Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE)))), Schema.arrayOf(Schema.recordOf("record22", Schema.Field.of("a", // Map<array<byte[]>, Map<boolean,byte[]> a
Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)), Schema.mapOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.BYTES)))))))), Schema.Field.of("i", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("j", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.LONG), Schema.of(Schema.Type.NULL))));
Schema anotherComplexSchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema superComplexSchema = Schema.unionOf(complexSchema, anotherComplexSchema, Schema.of(Schema.Type.NULL));
Set<String> expected = ImmutableSet.of("map1", "map1:MAP", "record21", "record21:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP", "record22", "record22:RECORD", "a", "a:MAP", "i", "i:INT", "j", "j:UNION", "record1", "record1:RECORD");
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, superComplexSchema.toString()));
Assert.assertEquals(addKeyPrefix(expected), actual);
}
use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.
the class MetadataDataset method getMetadata.
/**
* Returns metadata for a given set of entities
*
* @param targetIds entities for which metadata is required
* @return map of entitiyId to set of metadata for that entity
*/
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
if (targetIds.isEmpty()) {
return Collections.emptySet();
}
List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
for (NamespacedEntityId targetId : targetIds) {
fuzzyKeys.add(getFuzzyKeyFor(targetId));
}
// Sort fuzzy keys
Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
// Scan using fuzzy filter. Scan returns one row per property.
// Group the rows on namespacedId
Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
byte[] start = fuzzyKeys.get(0).getFirst();
byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
Row next;
while ((next = scan.next()) != null) {
MetadataEntry metadataEntry = convertRow(next);
if (metadataEntry != null) {
metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
}
}
}
// Create metadata objects for each entity from grouped rows
Set<Metadata> metadataSet = new HashSet<>();
for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
Map<String, String> properties = new HashMap<>();
Set<String> tags = Collections.emptySet();
for (MetadataEntry metadataEntry : entry.getValue()) {
if (TAGS_KEY.equals(metadataEntry.getKey())) {
tags = splitTags(metadataEntry.getValue());
} else {
properties.put(metadataEntry.getKey(), metadataEntry.getValue());
}
}
metadataSet.add(new Metadata(entry.getKey(), properties, tags));
}
return metadataSet;
}
use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.
the class MetadataDataset method getMetadata.
/**
* Return metadata based on target id, and key.
*
* @param targetId The id of the target
* @param key The metadata key to get
* @return instance of {@link MetadataEntry} for the target type, id, and key
*/
@Nullable
private MetadataEntry getMetadata(NamespacedEntityId targetId, String key) {
MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, key);
Row row = indexedTable.get(mdsKey.getKey());
if (row.isEmpty()) {
return null;
}
byte[] value = row.get(VALUE_COLUMN);
if (value == null) {
// This can happen when all tags are moved one by one. The row still exists, but the value is null.
return null;
}
return new MetadataEntry(targetId, key, Bytes.toString(value));
}
Aggregations