Search in sources :

Example 1 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDataset method storeIndexes.

/**
 * Store indexes for a {@link MetadataEntry}
 *
 * @param targetId the {@link NamespacedEntityId} from which the metadata indexes has to be stored
 * @param metadataKey the metadata key for which the indexes are to be stored
 * @param indexers {@link Set<String>} of {@link Indexer indexers} for this {@link MetadataEntry}
 * @param metadataEntry {@link MetadataEntry} for which indexes are to be stored
 */
private void storeIndexes(NamespacedEntityId targetId, String metadataKey, Set<Indexer> indexers, MetadataEntry metadataEntry) {
    // Delete existing indexes for targetId-key
    deleteIndexes(targetId, metadataKey);
    for (Indexer indexer : indexers) {
        Set<String> indexes = indexer.getIndexes(metadataEntry);
        String indexColumn = getIndexColumn(metadataKey, indexer.getSortOrder());
        for (String index : indexes) {
            // store just the index value
            indexedTable.put(getIndexPut(targetId, metadataKey, index, indexColumn));
        }
    }
}
Also used : ValueOnlyIndexer(co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer) DefaultValueIndexer(co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) SchemaIndexer(co.cask.cdap.data2.metadata.indexer.SchemaIndexer) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedTimeIndexer(co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)

Example 2 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class MetadataDatasetTest method testMultipleIndexes.

@Test
public void testMultipleIndexes() throws Exception {
    final String value = "value";
    final String body = "body";
    final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
    final String name = "dataset1";
    final long creationTime = System.currentTimeMillis();
    txnl.execute(() -> {
        dataset.addProperty(program1, "key", value);
        dataset.addProperty(program1, MetadataConstants.SCHEMA_KEY, schema);
        dataset.addProperty(dataset1, MetadataConstants.ENTITY_NAME_KEY, name);
        dataset.addProperty(dataset1, MetadataConstants.CREATION_TIME_KEY, String.valueOf(creationTime));
    });
    final String namespaceId = program1.getValue(MetadataEntity.NAMESPACE);
    txnl.execute(() -> {
        // entry with no special indexes
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, value);
        // entry with a schema
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, body);
        // entry with entity name
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, name);
        Indexer indexer = new InvertedValueIndexer();
        String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
        assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, index.toLowerCase());
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, name);
        // entry with creation time
        String time = String.valueOf(creationTime);
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
    });
}
Also used : InvertedValueIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer) Indexer(io.cdap.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer) Test(org.junit.Test)

Example 3 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleRecord.

@Test
public void testSimpleRecord() {
    Schema simpleSchema = Schema.recordOf("record1", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE))));
    Set<String> expected = ImmutableSet.of("record1", "record1:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(addKeyPrefixAndPropertiesField(expected), actual);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 4 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class SchemaIndexerTest method testComplexRecord.

@Test
public void testComplexRecord() {
    Schema complexSchema = Schema.recordOf("record1", Schema.Field.of("map1", Schema.mapOf(Schema.recordOf("record21", // String x
    Schema.Field.of("x", Schema.of(Schema.Type.STRING)), // String[] y
    Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))), // Map<byte[],double> z
    Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.DOUBLE)))), Schema.arrayOf(Schema.recordOf("record22", Schema.Field.of("a", // Map<array<byte[]>, Map<boolean,byte[]> a
    Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)), Schema.mapOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.BYTES)))))))), Schema.Field.of("i", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("j", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.LONG), Schema.of(Schema.Type.NULL))));
    Schema anotherComplexSchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
    Schema superComplexSchema = Schema.unionOf(complexSchema, anotherComplexSchema, Schema.of(Schema.Type.NULL));
    Set<String> expected = ImmutableSet.of("map1", "map1:MAP", "record21", "record21:RECORD", "x", "x:STRING", "y", "y:ARRAY", "z", "z:MAP", "record22", "record22:RECORD", "a", "a:MAP", "i", "i:INT", "j", "j:UNION", "record1", "record1:RECORD");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, superComplexSchema.toString()));
    Assert.assertEquals(addKeyPrefixAndPropertiesField(expected), actual);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 5 with Indexer

use of io.cdap.cdap.data2.metadata.indexer.Indexer in project cdap by caskdata.

the class SchemaIndexerTest method testInvalidSchema.

@Test
public void testInvalidSchema() {
    String invalidSchema = "an invalid schema";
    Set<String> expected = ImmutableSet.of("an", "invalid", "schema", "an invalid schema");
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, invalidSchema));
    Assert.assertEquals(addKeyPrefixAndPropertiesField(expected), actual);
}
Also used : MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)7 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)4 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)4 MetadataEntry (io.cdap.cdap.data2.metadata.dataset.MetadataEntry)4 DatasetId (io.cdap.cdap.proto.id.DatasetId)4 Schema (io.cdap.cdap.api.data.schema.Schema)3 BadRequestException (co.cask.cdap.common.BadRequestException)2 DefaultValueIndexer (co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer)2 InvertedTimeIndexer (co.cask.cdap.data2.metadata.indexer.InvertedTimeIndexer)2 SchemaIndexer (co.cask.cdap.data2.metadata.indexer.SchemaIndexer)2 ValueOnlyIndexer (co.cask.cdap.data2.metadata.indexer.ValueOnlyIndexer)2 Indexer (io.cdap.cdap.data2.metadata.indexer.Indexer)2 InvertedValueIndexer (io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer)2 TransactionExecutor (org.apache.tephra.TransactionExecutor)2 TransactionFailureException (org.apache.tephra.TransactionFailureException)2 Row (co.cask.cdap.api.dataset.table.Row)1 Scanner (co.cask.cdap.api.dataset.table.Scanner)1 EntityTypeSimpleName (co.cask.cdap.proto.element.EntityTypeSimpleName)1 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)1 ImmutableList (com.google.common.collect.ImmutableList)1