Search in sources :

Example 6 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDatasetTest method testIndexRebuilding.

@Test
public void testIndexRebuilding() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Indexer indexer = new ReversingIndexer();
            dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
            dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
        }
    });
    final String namespaceId = flow1.getNamespace();
    final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
        }
    });
    final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Re-build indexes. Now the default indexer should be used
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
            Assert.assertNotNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            if (!flowSearchResults.isEmpty()) {
                Assert.assertEquals(1, flowSearchResults.size());
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertEquals(1, flowSearchResults.size());
                Assert.assertTrue(dsSearchResults.isEmpty());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertTrue(dsSearchResults.isEmpty());
            } else {
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertTrue(flowSearchResults.isEmpty());
                Assert.assertEquals(1, dsSearchResults.size());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertEquals(1, dsSearchResults.size());
            }
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
            Assert.assertNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
        }
    });
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) EntityTypeSimpleName(co.cask.cdap.proto.element.EntityTypeSimpleName) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 7 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDatasetTest method testMultipleIndexes.

@Test
public void testMultipleIndexes() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testMultipleIndexes"), MetadataScope.SYSTEM);
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    final String value = "value";
    final String body = "body";
    final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
    final String name = "dataset1";
    final long creationTime = System.currentTimeMillis();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.setProperty(flow1, "key", value);
            dataset.setProperty(flow1, AbstractSystemMetadataWriter.SCHEMA_KEY, schema);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.ENTITY_NAME_KEY, name);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.CREATION_TIME_KEY, String.valueOf(creationTime));
        }
    });
    final String namespaceId = flow1.getNamespace();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // entry with no special indexes
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            // entry with a schema
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            // entry with entity name
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, name);
            assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            Indexer indexer = new InvertedValueIndexer();
            String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
            assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, index.toLowerCase());
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            // entry with creation time
            String time = String.valueOf(creationTime);
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
        }
    });
}
Also used : Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) Test(org.junit.Test)

Example 8 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDataset method rebuildIndexes.

/**
 * Rebuilds all the indexes in the {@link MetadataDataset} in batches.
 *
 * @param startRowKey the key of the row to start the scan for the current batch with
 * @param limit the batch size
 * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
 */
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
    // Now rebuild indexes for all values in the metadata dataset
    byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
    // If startRow is null, start at the beginning, else start at the provided start row
    startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
    // stopRowKey will always be the last row key with the valueRowPrefix
    byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
    Row row;
    try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
        while ((limit > 0) && (row = scanner.next()) != null) {
            byte[] rowKey = row.getRow();
            String targetType = MdsKey.getTargetType(rowKey);
            NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
            String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
            Set<Indexer> indexers = getIndexersForKey(metadataKey);
            MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
            if (metadataEntry == null) {
                LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
                continue;
            }
            // storeIndexes deletes old indexes
            storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
            limit--;
        }
        Row startRowForNextBatch = scanner.next();
        if (startRowForNextBatch == null) {
            return null;
        }
        return startRowForNextBatch.getRow();
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 9 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleSchema.

@Test
public void testSimpleSchema() {
    Schema simpleSchema = Schema.of(Schema.Type.INT);
    Set<String> expected = Collections.singleton("properties:schema");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(expected, actual);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 10 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDataset method storeIndexes.

/**
 * Store indexes for a {@link MetadataEntry}
 * @param indexers {@link Set<String>} of {@link Indexer indexers} for this {@link MetadataEntry}
 * @param metadataEntry {@link MetadataEntry} for which indexes are to be stored
 */
private void storeIndexes(MetadataEntry metadataEntry, Set<Indexer> indexers) {
    // Delete existing indexes for metadataEntity-key
    deleteIndexes(metadataEntry.getMetadataEntity(), metadataEntry.getKey());
    String namespacePrefix = metadataEntry.getMetadataEntity().getValue(MetadataEntity.NAMESPACE) + MetadataConstants.KEYVALUE_SEPARATOR;
    for (Indexer indexer : indexers) {
        Set<String> indexes = indexer.getIndexes(metadataEntry);
        IndexColumn indexColumn = getIndexColumn(metadataEntry.getKey(), indexer.getSortOrder());
        for (String index : indexes) {
            if (index.isEmpty()) {
                continue;
            }
            // store one value for within namespace search and one for cross namespace search
            String lowercaseIndex = index.toLowerCase();
            MDSKey mdsIndexKey = MetadataKey.createIndexRowKey(metadataEntry.getMetadataEntity(), metadataEntry.getKey(), lowercaseIndex);
            Put put = new Put(mdsIndexKey.getKey());
            put.add(Bytes.toBytes(indexColumn.getCrossNamespaceColumn()), Bytes.toBytes(lowercaseIndex));
            put.add(Bytes.toBytes(indexColumn.getColumn()), Bytes.toBytes(namespacePrefix + lowercaseIndex));
            indexedTable.put(put);
        }
    }
}
Also used : InvertedValueIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer) Indexer(io.cdap.cdap.data2.metadata.indexer.Indexer) SchemaIndexer(io.cdap.cdap.data2.metadata.indexer.SchemaIndexer) DefaultValueIndexer(io.cdap.cdap.data2.metadata.indexer.DefaultValueIndexer) MetadataEntityTypeIndexer(io.cdap.cdap.data2.metadata.indexer.MetadataEntityTypeIndexer) ValueOnlyIndexer(io.cdap.cdap.data2.metadata.indexer.ValueOnlyIndexer) InvertedTimeIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedTimeIndexer) MDSKey(io.cdap.cdap.data2.dataset2.lib.table.MDSKey) Put(io.cdap.cdap.api.dataset.table.Put)

Aggregations

Test (org.junit.Test)7 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)4 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)4 MetadataEntry (io.cdap.cdap.data2.metadata.dataset.MetadataEntry)4 DatasetId (io.cdap.cdap.proto.id.DatasetId)4 Schema (io.cdap.cdap.api.data.schema.Schema)3 BadRequestException (co.cask.cdap.common.BadRequestException)2 DefaultValueIndexer (co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer)2 InvertedTimeIndexer (co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)2 SchemaIndexer (co.cask.cdap.data2.metadata.indexer.SchemaIndexer)2 ValueOnlyIndexer (co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer)2 Indexer (io.cdap.cdap.data2.metadata.indexer.Indexer)2 InvertedValueIndexer (io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer)2 TransactionExecutor (org.apache.tephra.TransactionExecutor)2 TransactionFailureException (org.apache.tephra.TransactionFailureException)2 Row (co.cask.cdap.api.dataset.table.Row)1 Scanner (co.cask.cdap.api.dataset.table.Scanner)1 EntityTypeSimpleName (co.cask.cdap.proto.element.EntityTypeSimpleName)1 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)1 ImmutableList (com.google.common.collect.ImmutableList)1