use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class SchemaIndexerTest method testSimpleSchema.
@Test
public void testSimpleSchema() throws Exception {
Schema simpleSchema = Schema.of(Schema.Type.INT);
Set<String> expected = Collections.emptySet();
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
Assert.assertEquals(expected, actual);
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class SchemaIndexerTest method testComplexRecord.
@Test
public void testComplexRecord() throws Exception {
Schema complexSchema = Schema.recordOf("record1", Schema.Field.of("map1", Schema.mapOf(Schema.recordOf("record21", // String x
Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE)))), Schema.arrayOf(Schema.recordOf("record22", Schema.Field.of("a", // Map<array<byte[]>, Map<boolean,byte[]> a
Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)), Schema.mapOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.BYTES)))))))), Schema.Field.of("i", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("j", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.LONG), Schema.of(Schema.Type.NULL))));
Schema anotherComplexSchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema superComplexSchema = Schema.unionOf(complexSchema, anotherComplexSchema, Schema.of(Schema.Type.NULL));
Set<String> expected = ImmutableSet.of("map1", "map1:MAP", "record21", "record21:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP", "record22", "record22:RECORD", "a", "a:MAP", "i", "i:INT", "j", "j:UNION", "record1", "record1:RECORD");
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, superComplexSchema.toString()));
Assert.assertEquals(addKeyPrefix(expected), actual);
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDataset method write.
private void write(NamespacedEntityId targetId, MetadataEntry entry, Set<Indexer> indexers) {
String key = entry.getKey();
MDSKey mdsValueKey = MdsKey.getMDSValueKey(targetId, key);
Put put = new Put(mdsValueKey.getKey());
// add the metadata value
put.add(Bytes.toBytes(VALUE_COLUMN), Bytes.toBytes(entry.getValue()));
indexedTable.put(put);
storeIndexes(targetId, key, indexers, entry);
writeHistory(targetId);
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDataset method storeIndexes.
/**
* Store indexes for a {@link MetadataEntry}
*
* @param targetId the {@link NamespacedEntityId} from which the metadata indexes has to be stored
* @param metadataKey the metadata key for which the indexes are to be stored
* @param indexers {@link Set<String>} of {@link Indexer indexers} for this {@link MetadataEntry}
* @param metadataEntry {@link MetadataEntry} for which indexes are to be stored
*/
private void storeIndexes(NamespacedEntityId targetId, String metadataKey, Set<Indexer> indexers, MetadataEntry metadataEntry) {
// Delete existing indexes for targetId-key
deleteIndexes(targetId, metadataKey);
for (Indexer indexer : indexers) {
Set<String> indexes = indexer.getIndexes(metadataEntry);
String indexColumn = getIndexColumn(metadataKey, indexer.getSortOrder());
for (String index : indexes) {
// store just the index value
indexedTable.put(getIndexPut(targetId, metadataKey, index, indexColumn));
}
}
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDataset method rebuildIndexes.
/**
* Rebuilds all the indexes in the {@link MetadataDataset} in batches.
*
* @param startRowKey the key of the row to start the scan for the current batch with
* @param limit the batch size
* @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
*/
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
// Now rebuild indexes for all values in the metadata dataset
byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
// If startRow is null, start at the beginning, else start at the provided start row
startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
// stopRowKey will always be the last row key with the valueRowPrefix
byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
Row row;
try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
while ((limit > 0) && (row = scanner.next()) != null) {
byte[] rowKey = row.getRow();
String targetType = MdsKey.getTargetType(rowKey);
NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
Set<Indexer> indexers = getIndexersForKey(metadataKey);
MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
if (metadataEntry == null) {
LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
continue;
}
// storeIndexes deletes old indexes
storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
limit--;
}
Row startRowForNextBatch = scanner.next();
if (startRowForNextBatch == null) {
return null;
}
return startRowForNextBatch.getRow();
}
}
Aggregations