use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDatasetTest method testIndexRebuilding.
@Test
public void testIndexRebuilding() throws Exception {
final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Indexer indexer = new ReversingIndexer();
dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
}
});
final String namespaceId = flow1.getNamespace();
final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
}
});
final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Re-build indexes. Now the default indexer should be used
startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
Assert.assertNotNull(startRowKeyForNextBatch.get());
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
if (!flowSearchResults.isEmpty()) {
Assert.assertEquals(1, flowSearchResults.size());
flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertEquals(1, flowSearchResults.size());
Assert.assertTrue(dsSearchResults.isEmpty());
dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertTrue(dsSearchResults.isEmpty());
} else {
flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertTrue(flowSearchResults.isEmpty());
Assert.assertEquals(1, dsSearchResults.size());
dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertEquals(1, dsSearchResults.size());
}
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
Assert.assertNull(startRowKeyForNextBatch.get());
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertEquals(1, searchResults.size());
}
});
}
use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDatasetTest method testMultipleIndexes.
@Test
public void testMultipleIndexes() throws Exception {
final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testMultipleIndexes"), MetadataScope.SYSTEM);
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
final String value = "value";
final String body = "body";
final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
final String name = "dataset1";
final long creationTime = System.currentTimeMillis();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.setProperty(flow1, "key", value);
dataset.setProperty(flow1, AbstractSystemMetadataWriter.SCHEMA_KEY, schema);
dataset.setProperty(dataset1, AbstractSystemMetadataWriter.ENTITY_NAME_KEY, name);
dataset.setProperty(dataset1, AbstractSystemMetadataWriter.CREATION_TIME_KEY, String.valueOf(creationTime));
}
});
final String namespaceId = flow1.getNamespace();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// entry with no special indexes
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, value);
// entry with a schema
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, body);
// entry with entity name
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, name);
assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
Indexer indexer = new InvertedValueIndexer();
String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, index.toLowerCase());
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, name);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, name);
// entry with creation time
String time = String.valueOf(creationTime);
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, time);
assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
}
});
}
use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDataset method rebuildIndexes.
/**
* Rebuilds all the indexes in the {@link MetadataDataset} in batches.
*
* @param startRowKey the key of the row to start the scan for the current batch with
* @param limit the batch size
* @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
*/
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
// Now rebuild indexes for all values in the metadata dataset
byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
// If startRow is null, start at the beginning, else start at the provided start row
startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
// stopRowKey will always be the last row key with the valueRowPrefix
byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
Row row;
try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
while ((limit > 0) && (row = scanner.next()) != null) {
byte[] rowKey = row.getRow();
String targetType = MdsKey.getTargetType(rowKey);
NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
Set<Indexer> indexers = getIndexersForKey(metadataKey);
MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
if (metadataEntry == null) {
LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
continue;
}
// storeIndexes deletes old indexes
storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
limit--;
}
Row startRowForNextBatch = scanner.next();
if (startRowForNextBatch == null) {
return null;
}
return startRowForNextBatch.getRow();
}
}
use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class SchemaIndexerTest method testSimpleSchema.
@Test
public void testSimpleSchema() {
Schema simpleSchema = Schema.of(Schema.Type.INT);
Set<String> expected = Collections.singleton("properties:schema");
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDataset method storeIndexes.
/**
* Store indexes for a {@link MetadataEntry}
* @param indexers {@link Set<String>} of {@link Indexer indexers} for this {@link MetadataEntry}
* @param metadataEntry {@link MetadataEntry} for which indexes are to be stored
*/
private void storeIndexes(MetadataEntry metadataEntry, Set<Indexer> indexers) {
// Delete existing indexes for metadataEntity-key
deleteIndexes(metadataEntry.getMetadataEntity(), metadataEntry.getKey());
String namespacePrefix = metadataEntry.getMetadataEntity().getValue(MetadataEntity.NAMESPACE) + MetadataConstants.KEYVALUE_SEPARATOR;
for (Indexer indexer : indexers) {
Set<String> indexes = indexer.getIndexes(metadataEntry);
IndexColumn indexColumn = getIndexColumn(metadataEntry.getKey(), indexer.getSortOrder());
for (String index : indexes) {
if (index.isEmpty()) {
continue;
}
// store one value for within namespace search and one for cross namespace search
String lowercaseIndex = index.toLowerCase();
MDSKey mdsIndexKey = MetadataKey.createIndexRowKey(metadataEntry.getMetadataEntity(), metadataEntry.getKey(), lowercaseIndex);
Put put = new Put(mdsIndexKey.getKey());
put.add(Bytes.toBytes(indexColumn.getCrossNamespaceColumn()), Bytes.toBytes(lowercaseIndex));
put.add(Bytes.toBytes(indexColumn.getColumn()), Bytes.toBytes(namespacePrefix + lowercaseIndex));
indexedTable.put(put);
}
}
}
Aggregations