Search in sources :

Example 6 with Indexer

use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDatasetTest method testIndexRebuilding.

@Test
public void testIndexRebuilding() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Indexer indexer = new ReversingIndexer();
            dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
            dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
        }
    });
    final String namespaceId = flow1.getNamespace();
    final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
        }
    });
    final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Re-build indexes. Now the default indexer should be used
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
            Assert.assertNotNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            if (!flowSearchResults.isEmpty()) {
                Assert.assertEquals(1, flowSearchResults.size());
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertEquals(1, flowSearchResults.size());
                Assert.assertTrue(dsSearchResults.isEmpty());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertTrue(dsSearchResults.isEmpty());
            } else {
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertTrue(flowSearchResults.isEmpty());
                Assert.assertEquals(1, dsSearchResults.size());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertEquals(1, dsSearchResults.size());
            }
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
            Assert.assertNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
        }
    });
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) EntityTypeSimpleName(co.cask.cdap.proto.element.EntityTypeSimpleName) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 7 with Indexer

use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDatasetTest method testMultipleIndexes.

@Test
public void testMultipleIndexes() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testMultipleIndexes"), MetadataScope.SYSTEM);
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    final String value = "value";
    final String body = "body";
    final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
    final String name = "dataset1";
    final long creationTime = System.currentTimeMillis();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.setProperty(flow1, "key", value);
            dataset.setProperty(flow1, AbstractSystemMetadataWriter.SCHEMA_KEY, schema);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.ENTITY_NAME_KEY, name);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.CREATION_TIME_KEY, String.valueOf(creationTime));
        }
    });
    final String namespaceId = flow1.getNamespace();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // entry with no special indexes
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            // entry with a schema
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            // entry with entity name
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, name);
            assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            Indexer indexer = new InvertedValueIndexer();
            String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
            assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, index.toLowerCase());
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            // entry with creation time
            String time = String.valueOf(creationTime);
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
        }
    });
}
Also used : Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) Test(org.junit.Test)

Example 8 with Indexer

use of co.cask.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleRecord.

@Test
public void testSimpleRecord() throws Exception {
    Schema simpleSchema = Schema.recordOf("record1", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE))));
    Set<String> expected = ImmutableSet.of("record1", "record1:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(addKeyPrefix(expected), actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)5 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)4 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)4 Schema (co.cask.cdap.api.data.schema.Schema)3 MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 BadRequestException (co.cask.cdap.common.BadRequestException)2 DefaultValueIndexer (co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer)2 InvertedTimeIndexer (co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)2 SchemaIndexer (co.cask.cdap.data2.metadata.indexer.SchemaIndexer)2 ValueOnlyIndexer (co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer)2 TransactionExecutor (org.apache.tephra.TransactionExecutor)2 TransactionFailureException (org.apache.tephra.TransactionFailureException)2 Put (co.cask.cdap.api.dataset.table.Put)1 Row (co.cask.cdap.api.dataset.table.Row)1 Scanner (co.cask.cdap.api.dataset.table.Scanner)1 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)1 EntityTypeSimpleName (co.cask.cdap.proto.element.EntityTypeSimpleName)1 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)1 ImmutableList (com.google.common.collect.ImmutableList)1