use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDatasetTest method testIndexRebuilding.
@Test
public void testIndexRebuilding() throws Exception {
final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Indexer indexer = new ReversingIndexer();
dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
}
});
final String namespaceId = flow1.getNamespace();
final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertTrue(searchResults.isEmpty());
}
});
final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Re-build indexes. Now the default indexer should be used
startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
Assert.assertNotNull(startRowKeyForNextBatch.get());
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
if (!flowSearchResults.isEmpty()) {
Assert.assertEquals(1, flowSearchResults.size());
flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertEquals(1, flowSearchResults.size());
Assert.assertTrue(dsSearchResults.isEmpty());
dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertTrue(dsSearchResults.isEmpty());
} else {
flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertTrue(flowSearchResults.isEmpty());
Assert.assertEquals(1, dsSearchResults.size());
dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertEquals(1, dsSearchResults.size());
}
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
Assert.assertNull(startRowKeyForNextBatch.get());
}
});
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
Assert.assertEquals(1, searchResults.size());
searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
Assert.assertEquals(1, searchResults.size());
}
});
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class MetadataDatasetTest method testMultipleIndexes.
@Test
public void testMultipleIndexes() throws Exception {
final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testMultipleIndexes"), MetadataScope.SYSTEM);
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
final String value = "value";
final String body = "body";
final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
final String name = "dataset1";
final long creationTime = System.currentTimeMillis();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.setProperty(flow1, "key", value);
dataset.setProperty(flow1, AbstractSystemMetadataWriter.SCHEMA_KEY, schema);
dataset.setProperty(dataset1, AbstractSystemMetadataWriter.ENTITY_NAME_KEY, name);
dataset.setProperty(dataset1, AbstractSystemMetadataWriter.CREATION_TIME_KEY, String.valueOf(creationTime));
}
});
final String namespaceId = flow1.getNamespace();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// entry with no special indexes
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, value);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, value);
// entry with a schema
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, body);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, body);
// entry with entity name
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, name);
assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
Indexer indexer = new InvertedValueIndexer();
String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, index.toLowerCase());
assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, name);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, name);
// entry with creation time
String time = String.valueOf(creationTime);
assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, time);
assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, time);
assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
}
});
}
use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.
the class SchemaIndexerTest method testSimpleRecord.
@Test
public void testSimpleRecord() throws Exception {
Schema simpleSchema = Schema.recordOf("record1", // String x
Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE))));
Set<String> expected = ImmutableSet.of("record1", "record1:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP");
SchemaIndexer indexer = new SchemaIndexer();
DatasetId datasetInstance = new DatasetId("ns1", "ds1");
Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
Assert.assertEquals(addKeyPrefix(expected), actual);
}
Aggregations