Search in sources :

Example 11 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDataset method rebuildIndexes.

/**
   * Rebuilds all the indexes in the {@link MetadataDataset} in batches.
   *
   * @param startRowKey the key of the row to start the scan for the current batch with
   * @param limit the batch size
   * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan.
   */
@Nullable
public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) {
    // Now rebuild indexes for all values in the metadata dataset
    byte[] valueRowPrefix = MdsKey.getValueRowPrefix();
    // If startRow is null, start at the beginning, else start at the provided start row
    startRowKey = startRowKey == null ? valueRowPrefix : startRowKey;
    // stopRowKey will always be the last row key with the valueRowPrefix
    byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix);
    Row row;
    try (Scanner scanner = indexedTable.scan(startRowKey, stopRowKey)) {
        while ((limit > 0) && (row = scanner.next()) != null) {
            byte[] rowKey = row.getRow();
            String targetType = MdsKey.getTargetType(rowKey);
            NamespacedEntityId namespacedEntityId = MdsKey.getNamespacedIdFromKey(targetType, rowKey);
            String metadataKey = MdsKey.getMetadataKey(targetType, rowKey);
            Set<Indexer> indexers = getIndexersForKey(metadataKey);
            MetadataEntry metadataEntry = getMetadata(namespacedEntityId, metadataKey);
            if (metadataEntry == null) {
                LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedEntityId);
                continue;
            }
            // storeIndexes deletes old indexes
            storeIndexes(namespacedEntityId, metadataKey, indexers, metadataEntry);
            limit--;
        }
        Row startRowForNextBatch = scanner.next();
        if (startRowForNextBatch == null) {
            return null;
        }
        return startRowForNextBatch.getRow();
    }
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer) Row(co.cask.cdap.api.dataset.table.Row) Nullable(javax.annotation.Nullable)

Example 12 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDatasetTest method testIndexRebuilding.

@Test
public void testIndexRebuilding() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Indexer indexer = new ReversingIndexer();
            dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
            dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
        }
    });
    final String namespaceId = flow1.getNamespace();
    final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
        }
    });
    final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Re-build indexes. Now the default indexer should be used
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
            Assert.assertNotNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            if (!flowSearchResults.isEmpty()) {
                Assert.assertEquals(1, flowSearchResults.size());
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertEquals(1, flowSearchResults.size());
                Assert.assertTrue(dsSearchResults.isEmpty());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertTrue(dsSearchResults.isEmpty());
            } else {
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertTrue(flowSearchResults.isEmpty());
                Assert.assertEquals(1, dsSearchResults.size());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertEquals(1, dsSearchResults.size());
            }
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
            Assert.assertNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
        }
    });
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) EntityTypeSimpleName(co.cask.cdap.proto.element.EntityTypeSimpleName) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 13 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDatasetTest method testMultipleIndexes.

@Test
public void testMultipleIndexes() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testMultipleIndexes"), MetadataScope.SYSTEM);
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    final String value = "value";
    final String body = "body";
    final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
    final String name = "dataset1";
    final long creationTime = System.currentTimeMillis();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.setProperty(flow1, "key", value);
            dataset.setProperty(flow1, AbstractSystemMetadataWriter.SCHEMA_KEY, schema);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.ENTITY_NAME_KEY, name);
            dataset.setProperty(dataset1, AbstractSystemMetadataWriter.CREATION_TIME_KEY, String.valueOf(creationTime));
        }
    });
    final String namespaceId = flow1.getNamespace();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // entry with no special indexes
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, value);
            // entry with a schema
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, body);
            // entry with entity name
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, name);
            assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, name);
            Indexer indexer = new InvertedValueIndexer();
            String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
            assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, index.toLowerCase());
            assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, name);
            // entry with creation time
            String time = String.valueOf(creationTime);
            assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, time);
            assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN, namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
        }
    });
}
Also used : Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) Test(org.junit.Test)

Example 14 with MetadataEntry

use of co.cask.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleRecord.

@Test
public void testSimpleRecord() throws Exception {
    Schema simpleSchema = Schema.recordOf("record1", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE))));
    Set<String> expected = ImmutableSet.of("record1", "record1:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(addKeyPrefix(expected), actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)7 Test (org.junit.Test)6 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)4 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)4 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 Row (co.cask.cdap.api.dataset.table.Row)3 BadRequestException (co.cask.cdap.common.BadRequestException)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 ArrayList (java.util.ArrayList)3 TransactionExecutor (org.apache.tephra.TransactionExecutor)3 Scanner (co.cask.cdap.api.dataset.table.Scanner)2 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)2 DefaultValueIndexer (co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer)2 InvertedTimeIndexer (co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)2 SchemaIndexer (co.cask.cdap.data2.metadata.indexer.SchemaIndexer)2 ValueOnlyIndexer (co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2