Search in sources :

Example 6 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SchemaIndexerTest method testSimpleSchema.

@Test
public void testSimpleSchema() throws Exception {
    Schema simpleSchema = Schema.of(Schema.Type.INT);
    Set<String> expected = Collections.emptySet();
    SchemaIndexer indexer = new SchemaIndexer();
    DatasetId datasetInstance = new DatasetId("ns1", "ds1");
    Set<String> actual = indexer.getIndexes(new MetadataEntry(datasetInstance, KEY, simpleSchema.toString()));
    Assert.assertEquals(expected, actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 7 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class SearchHelper method search.

private MetadataSearchResponse search(Set<MetadataScope> scopes, SearchRequest request) {
    List<MetadataEntry> results = new LinkedList<>();
    List<String> cursors = new LinkedList<>();
    for (MetadataScope scope : scopes) {
        SearchResults searchResults = execute(context -> context.getDataset(scope).search(request));
        results.addAll(searchResults.getResults());
        cursors.addAll(searchResults.getCursors());
    }
    int offset = request.getOffset();
    int limit = request.getLimit();
    SortInfo sortInfo = request.getSortInfo();
    // sort if required
    Set<MetadataEntity> sortedEntities = getSortedEntities(results, sortInfo);
    int total = sortedEntities.size();
    // pagination is not performed at the dataset level, because:
    // 1. scoring is needed for DEFAULT sort info. So perform it here for now.
    // 2. Even when using custom sorting, we need to remove elements from the beginning to the offset and the cursors
    // at the end
    // TODO: Figure out how all of this can be done server (HBase) side
    int startIndex = Math.min(request.getOffset(), sortedEntities.size());
    // Account for overflow
    int endIndex = (int) Math.min(Integer.MAX_VALUE, (long) offset + limit);
    endIndex = Math.min(endIndex, sortedEntities.size());
    // add 1 to maxIndex because end index is exclusive
    Set<MetadataEntity> subSortedEntities = new LinkedHashSet<>(ImmutableList.copyOf(sortedEntities).subList(startIndex, endIndex));
    // Fetch metadata for entities in the result list
    // Note: since the fetch is happening in a different transaction, the metadata for entities may have been
    // removed. It is okay not to have metadata for some results in case this happens.
    Set<MetadataSearchResultRecord> finalResults = execute(context -> addMetadataToEntities(subSortedEntities, fetchMetadata(context.getDataset(SYSTEM), subSortedEntities), fetchMetadata(context.getDataset(USER), subSortedEntities)));
    return new MetadataSearchResponse(sortInfo.getSortBy() + " " + sortInfo.getSortOrder(), offset, limit, request.getNumCursors(), total, finalResults, cursors, request.shouldShowHidden(), request.getEntityScopes());
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) MetadataSearchResponse(io.cdap.cdap.proto.metadata.MetadataSearchResponse) SearchResults(io.cdap.cdap.data2.metadata.dataset.SearchResults) LinkedList(java.util.LinkedList) SortInfo(io.cdap.cdap.data2.metadata.dataset.SortInfo) MetadataSearchResultRecord(io.cdap.cdap.proto.metadata.MetadataSearchResultRecord) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope)

Example 8 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class MetadataDatasetTest method testMultipleIndexes.

@Test
public void testMultipleIndexes() throws Exception {
    final String value = "value";
    final String body = "body";
    final String schema = Schema.recordOf("schema", Schema.Field.of(body, Schema.of(Schema.Type.BYTES))).toString();
    final String name = "dataset1";
    final long creationTime = System.currentTimeMillis();
    txnl.execute(() -> {
        dataset.addProperty(program1, "key", value);
        dataset.addProperty(program1, MetadataConstants.SCHEMA_KEY, schema);
        dataset.addProperty(dataset1, MetadataConstants.ENTITY_NAME_KEY, name);
        dataset.addProperty(dataset1, MetadataConstants.CREATION_TIME_KEY, String.valueOf(creationTime));
    });
    final String namespaceId = program1.getValue(MetadataEntity.NAMESPACE);
    txnl.execute(() -> {
        // entry with no special indexes
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, value);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, value);
        // entry with a schema
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, body);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, body);
        // entry with entity name
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertSingleIndex(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, name);
        Indexer indexer = new InvertedValueIndexer();
        String index = Iterables.getOnlyElement(indexer.getIndexes(new MetadataEntry(dataset1, "key", name)));
        assertSingleIndex(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, index.toLowerCase());
        assertNoIndexes(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, name);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, name);
        // entry with creation time
        String time = String.valueOf(creationTime);
        assertSingleIndex(dataset, MetadataDataset.DEFAULT_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_ENTITY_NAME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertSingleIndex(dataset, MetadataDataset.CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertNoIndexes(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, time);
        assertSingleIndex(dataset, MetadataDataset.INVERTED_CREATION_TIME_INDEX_COLUMN.getColumn(), namespaceId, String.valueOf(Long.MAX_VALUE - creationTime));
    });
}
Also used : InvertedValueIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer) Indexer(io.cdap.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer) Test(org.junit.Test)

Example 9 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class DefaultValueIndexerTest method testSingleSplitTags.

@Test
public void testSingleSplitTags() {
    MetadataEntry entry = new MetadataEntry(NamespaceId.DEFAULT.app("a"), MetadataConstants.TAGS_KEY, "foo bar");
    Set<String> expected = new HashSet<>();
    expected.add("foo");
    expected.add("bar");
    expected.add(MetadataConstants.TAGS_KEY + ":foo");
    expected.add(MetadataConstants.TAGS_KEY + ":bar");
    Assert.assertEquals(expected, indexer.getIndexes(entry));
}
Also used : MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with MetadataEntry

use of io.cdap.cdap.data2.metadata.dataset.MetadataEntry in project cdap by caskdata.

the class DefaultValueIndexerTest method testSingleSplitProperty.

@Test
public void testSingleSplitProperty() {
    MetadataEntry entry = new MetadataEntry(NamespaceId.DEFAULT.app("a"), "key", "foo bar");
    Set<String> expected = new HashSet<>();
    // CDAP-13629 - seems odd 'foo bar' is generated here, but not for a single tag 'foo bar'
    expected.add("foo bar");
    expected.add("foo");
    expected.add("bar");
    expected.add("key:foo bar");
    expected.add("key:foo");
    expected.add("key:bar");
    expected.add("properties:key");
    Assert.assertEquals(expected, indexer.getIndexes(entry));
}
Also used : MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)16 MetadataEntry (io.cdap.cdap.data2.metadata.dataset.MetadataEntry)13 MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)7 HashSet (java.util.HashSet)7 ArrayList (java.util.ArrayList)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 DatasetId (io.cdap.cdap.proto.id.DatasetId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 Schema (io.cdap.cdap.api.data.schema.Schema)3 MetadataEntity (io.cdap.cdap.api.metadata.MetadataEntity)3 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)3 HashMap (java.util.HashMap)3 LinkedHashSet (java.util.LinkedHashSet)3 Map (java.util.Map)3 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)2 Put (io.cdap.cdap.api.dataset.table.Put)2 Row (io.cdap.cdap.api.dataset.table.Row)2 Indexer (io.cdap.cdap.data2.metadata.indexer.Indexer)2 InvertedValueIndexer (io.cdap.cdap.data2.metadata.indexer.InvertedValueIndexer)2