Search in sources :

Example 1 with SearchResults

use of co.cask.cdap.data2.metadata.dataset.SearchResults in project cdap by caskdata.

the class DefaultMetadataStore method search.

private MetadataSearchResponse search(Set<MetadataScope> scopes, String namespaceId, String searchQuery, Set<EntityTypeSimpleName> types, SortInfo sortInfo, int offset, int limit, int numCursors, String cursor, boolean showHidden, Set<EntityScope> entityScope) throws BadRequestException {
    if (offset < 0) {
        throw new IllegalArgumentException("offset must not be negative");
    }
    if (limit < 0) {
        throw new IllegalArgumentException("limit must not be negative");
    }
    List<MetadataEntry> results = new LinkedList<>();
    List<String> cursors = new LinkedList<>();
    for (MetadataScope scope : scopes) {
        SearchResults searchResults = getSearchResults(scope, namespaceId, searchQuery, types, sortInfo, offset, limit, numCursors, cursor, showHidden, entityScope);
        results.addAll(searchResults.getResults());
        cursors.addAll(searchResults.getCursors());
    }
    // sort if required
    Set<NamespacedEntityId> sortedEntities = getSortedEntities(results, sortInfo);
    int total = sortedEntities.size();
    // pagination is not performed at the dataset level, because:
    // 1. scoring is needed for DEFAULT sort info. So perform it here for now.
    // 2. Even when using custom sorting, we need to remove elements from the beginning to the offset and the cursors
    // at the end
    // TODO: Figure out how all of this can be done server (HBase) side
    int startIndex = Math.min(offset, sortedEntities.size());
    // Account for overflow
    int endIndex = (int) Math.min(Integer.MAX_VALUE, (long) offset + limit);
    endIndex = Math.min(endIndex, sortedEntities.size());
    // add 1 to maxIndex because end index is exclusive
    sortedEntities = new LinkedHashSet<>(ImmutableList.copyOf(sortedEntities).subList(startIndex, endIndex));
    // Fetch metadata for entities in the result list
    // Note: since the fetch is happening in a different transaction, the metadata for entities may have been
    // removed. It is okay not to have metadata for some results in case this happens.
    Map<NamespacedEntityId, Metadata> systemMetadata = fetchMetadata(sortedEntities, MetadataScope.SYSTEM);
    Map<NamespacedEntityId, Metadata> userMetadata = fetchMetadata(sortedEntities, MetadataScope.USER);
    return new MetadataSearchResponse(sortInfo.getSortBy() + " " + sortInfo.getSortOrder(), offset, limit, numCursors, total, addMetadataToEntities(sortedEntities, systemMetadata, userMetadata), cursors, showHidden, entityScope);
}
Also used : Metadata(co.cask.cdap.data2.metadata.dataset.Metadata) MetadataSearchResponse(co.cask.cdap.proto.metadata.MetadataSearchResponse) SearchResults(co.cask.cdap.data2.metadata.dataset.SearchResults) LinkedList(java.util.LinkedList) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) MetadataScope(co.cask.cdap.api.metadata.MetadataScope)

Example 2 with SearchResults

use of co.cask.cdap.data2.metadata.dataset.SearchResults in project cdap by caskdata.

the class MetadataDatasetTest method testIndexRebuilding.

@Test
public void testIndexRebuilding() throws Exception {
    final MetadataDataset dataset = getDataset(DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testIndexRebuilding"));
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) dataset);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Indexer indexer = new ReversingIndexer();
            dataset.setMetadata(new MetadataEntry(flow1, "flowKey", "flowValue"), Collections.singleton(indexer));
            dataset.setMetadata(new MetadataEntry(dataset1, "datasetKey", "datasetValue"), Collections.singleton(indexer));
        }
    });
    final String namespaceId = flow1.getNamespace();
    final Set<EntityTypeSimpleName> targetTypes = Collections.singleton(EntityTypeSimpleName.ALL);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertTrue(searchResults.isEmpty());
        }
    });
    final AtomicReference<byte[]> startRowKeyForNextBatch = new AtomicReference<>();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Re-build indexes. Now the default indexer should be used
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(null, 1));
            Assert.assertNotNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            List<MetadataEntry> dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            if (!flowSearchResults.isEmpty()) {
                Assert.assertEquals(1, flowSearchResults.size());
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertEquals(1, flowSearchResults.size());
                Assert.assertTrue(dsSearchResults.isEmpty());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertTrue(dsSearchResults.isEmpty());
            } else {
                flowSearchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
                Assert.assertTrue(flowSearchResults.isEmpty());
                Assert.assertEquals(1, dsSearchResults.size());
                dsSearchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
                Assert.assertEquals(1, dsSearchResults.size());
            }
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            startRowKeyForNextBatch.set(dataset.rebuildIndexes(startRowKeyForNextBatch.get(), 1));
            Assert.assertNull(startRowKeyForNextBatch.get());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<MetadataEntry> searchResults = searchByDefaultIndex(dataset, namespaceId, "flowValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "flowKey:flow*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetValue", targetTypes);
            Assert.assertEquals(1, searchResults.size());
            searchResults = searchByDefaultIndex(dataset, namespaceId, "datasetKey:dataset*", targetTypes);
            Assert.assertEquals(1, searchResults.size());
        }
    });
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionFailureException(org.apache.tephra.TransactionFailureException) BadRequestException(co.cask.cdap.common.BadRequestException) EntityTypeSimpleName(co.cask.cdap.proto.element.EntityTypeSimpleName) Indexer(co.cask.cdap.data2.metadata.indexer.Indexer) InvertedValueIndexer(co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Aggregations

MetadataScope (co.cask.cdap.api.metadata.MetadataScope)1 BadRequestException (co.cask.cdap.common.BadRequestException)1 Metadata (co.cask.cdap.data2.metadata.dataset.Metadata)1 MetadataEntry (co.cask.cdap.data2.metadata.dataset.MetadataEntry)1 SearchResults (co.cask.cdap.data2.metadata.dataset.SearchResults)1 Indexer (co.cask.cdap.data2.metadata.indexer.Indexer)1 InvertedValueIndexer (co.cask.cdap.data2.metadata.indexer.InvertedValueIndexer)1 EntityTypeSimpleName (co.cask.cdap.proto.element.EntityTypeSimpleName)1 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)1 MetadataSearchResponse (co.cask.cdap.proto.metadata.MetadataSearchResponse)1 ImmutableList (com.google.common.collect.ImmutableList)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 TransactionExecutor (org.apache.tephra.TransactionExecutor)1 TransactionFailureException (org.apache.tephra.TransactionFailureException)1 Test (org.junit.Test)1