Search in sources :

Example 1 with SortInfo

use of io.cdap.cdap.data2.metadata.dataset.SortInfo in project cdap by caskdata.

the class DatasetMetadataStorage method search.

@Override
public SearchResponse search(SearchRequest request) {
    Cursor cursor = request.getCursor() != null && !request.getCursor().isEmpty() ? Cursor.fromString(request.getCursor()) : null;
    Set<String> namespaces = cursor == null ? request.getNamespaces() : cursor.getNamespaces();
    ImmutablePair<NamespaceId, Set<EntityScope>> namespaceAndScopes = determineNamespaceAndScopes(namespaces);
    CursorAndOffsetInfo cursorOffsetAndLimits = determineCursorOffsetAndLimits(request, cursor);
    String query = cursor != null ? cursor.getQuery() : request.getQuery() == null || request.getQuery().isEmpty() ? "*" : request.getQuery();
    Set<String> types = cursor != null ? cursor.getTypes() : request.getTypes();
    types = types == null ? Collections.emptySet() : types;
    Sorting sorting = cursor == null ? request.getSorting() : cursor.getSorting() == null ? null : Sorting.of(cursor.getSorting());
    SortInfo sortInfo = sorting == null ? SortInfo.DEFAULT : new SortInfo(sorting.getKey(), SortInfo.SortOrder.valueOf(sorting.getOrder().name()));
    boolean showHidden = cursor != null ? cursor.isShowHidden() : request.isShowHidden();
    MetadataScope scope = cursor != null ? cursor.getScope() : request.getScope();
    MetadataSearchResponse response = search(new io.cdap.cdap.data2.metadata.dataset.SearchRequest(namespaceAndScopes.getFirst(), query, types, sortInfo, cursorOffsetAndLimits.getOffsetToRequest(), cursorOffsetAndLimits.getLimitToRequest(), request.isCursorRequested() ? 1 : 0, cursorOffsetAndLimits.getCursor(), showHidden, namespaceAndScopes.getSecond()), scope);
    // translate results back and limit them to at most what was requested (see above where we add 1)
    int limitToRespond = cursorOffsetAndLimits.getLimitToRespond();
    int offsetToRespond = cursorOffsetAndLimits.getOffsetToRespond();
    List<MetadataRecord> results = response.getResults().stream().limit(limitToRespond).map(record -> {
        Metadata metadata = Metadata.EMPTY;
        for (Map.Entry<MetadataScope, io.cdap.cdap.api.metadata.Metadata> entry : record.getMetadata().entrySet()) {
            Metadata toAdd = new Metadata(entry.getKey(), entry.getValue().getTags(), entry.getValue().getProperties());
            metadata = mergeDisjointMetadata(metadata, toAdd);
        }
        return new MetadataRecord(record.getMetadataEntity(), metadata);
    }).collect(Collectors.toList());
    Cursor newCursor = null;
    if (response.getCursors() != null && !response.getCursors().isEmpty()) {
        String actualCursor = response.getCursors().get(0);
        if (cursor != null) {
            // the new cursor's offset is the previous cursor's offset plus the number of results
            newCursor = new Cursor(cursor, cursor.getOffset() + results.size(), actualCursor);
        } else {
            newCursor = new Cursor(offsetToRespond + results.size(), limitToRespond, showHidden, scope, namespaces, types, sorting == null ? null : sorting.toString(), actualCursor, query);
        }
    }
    // adjust the total results by the difference of requested offset and the true offset that we respond back
    int totalResults = offsetToRespond - cursorOffsetAndLimits.getOffsetToRequest() + response.getTotal();
    return new SearchResponse(request, newCursor == null ? null : newCursor.toString(), offsetToRespond, limitToRespond, totalResults, results);
}
Also used : MetadataSearchResponse(io.cdap.cdap.proto.metadata.MetadataSearchResponse) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) MetadataDirective(io.cdap.cdap.spi.metadata.MetadataDirective) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) Inject(com.google.inject.Inject) HashMap(java.util.HashMap) USER(io.cdap.cdap.api.metadata.MetadataScope.USER) MetadataChange(io.cdap.cdap.spi.metadata.MetadataChange) MetadataStorage(io.cdap.cdap.spi.metadata.MetadataStorage) HashSet(java.util.HashSet) TAG(io.cdap.cdap.spi.metadata.MetadataKind.TAG) ScopedNameOfKind(io.cdap.cdap.spi.metadata.ScopedNameOfKind) Metadata(io.cdap.cdap.spi.metadata.Metadata) Map(java.util.Map) SearchRequest(io.cdap.cdap.spi.metadata.SearchRequest) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) MetadataMutation(io.cdap.cdap.spi.metadata.MetadataMutation) Read(io.cdap.cdap.spi.metadata.Read) EnumSet(java.util.EnumSet) Sorting(io.cdap.cdap.spi.metadata.Sorting) SortInfo(io.cdap.cdap.data2.metadata.dataset.SortInfo) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) PROPERTY(io.cdap.cdap.spi.metadata.MetadataKind.PROPERTY) ImmutableMap(com.google.common.collect.ImmutableMap) Cursor(io.cdap.cdap.common.metadata.Cursor) SYSTEM(io.cdap.cdap.api.metadata.MetadataScope.SYSTEM) Set(java.util.Set) SearchResponse(io.cdap.cdap.spi.metadata.SearchResponse) IOException(java.io.IOException) MetadataKind(io.cdap.cdap.spi.metadata.MetadataKind) ScopedName(io.cdap.cdap.spi.metadata.ScopedName) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) EntityScope(io.cdap.cdap.proto.EntityScope) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) Named(com.google.inject.name.Named) Constants(io.cdap.cdap.common.conf.Constants) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DatasetDefinition(io.cdap.cdap.api.dataset.DatasetDefinition) MetadataRecord(io.cdap.cdap.spi.metadata.MetadataRecord) MutationOptions(io.cdap.cdap.spi.metadata.MutationOptions) MetadataDataset(io.cdap.cdap.data2.metadata.dataset.MetadataDataset) Collections(java.util.Collections) HashSet(java.util.HashSet) EnumSet(java.util.EnumSet) Set(java.util.Set) Metadata(io.cdap.cdap.spi.metadata.Metadata) Cursor(io.cdap.cdap.common.metadata.Cursor) MetadataRecord(io.cdap.cdap.spi.metadata.MetadataRecord) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope) MetadataSearchResponse(io.cdap.cdap.proto.metadata.MetadataSearchResponse) Sorting(io.cdap.cdap.spi.metadata.Sorting) SortInfo(io.cdap.cdap.data2.metadata.dataset.SortInfo) MetadataSearchResponse(io.cdap.cdap.proto.metadata.MetadataSearchResponse) SearchResponse(io.cdap.cdap.spi.metadata.SearchResponse) NamespaceId(io.cdap.cdap.proto.id.NamespaceId)

Example 2 with SortInfo

use of io.cdap.cdap.data2.metadata.dataset.SortInfo in project cdap by caskdata.

the class SearchHelper method search.

private MetadataSearchResponse search(Set<MetadataScope> scopes, SearchRequest request) {
    List<MetadataEntry> results = new LinkedList<>();
    List<String> cursors = new LinkedList<>();
    for (MetadataScope scope : scopes) {
        SearchResults searchResults = execute(context -> context.getDataset(scope).search(request));
        results.addAll(searchResults.getResults());
        cursors.addAll(searchResults.getCursors());
    }
    int offset = request.getOffset();
    int limit = request.getLimit();
    SortInfo sortInfo = request.getSortInfo();
    // sort if required
    Set<MetadataEntity> sortedEntities = getSortedEntities(results, sortInfo);
    int total = sortedEntities.size();
    // pagination is not performed at the dataset level, because:
    // 1. scoring is needed for DEFAULT sort info. So perform it here for now.
    // 2. Even when using custom sorting, we need to remove elements from the beginning to the offset and the cursors
    // at the end
    // TODO: Figure out how all of this can be done server (HBase) side
    int startIndex = Math.min(request.getOffset(), sortedEntities.size());
    // Account for overflow
    int endIndex = (int) Math.min(Integer.MAX_VALUE, (long) offset + limit);
    endIndex = Math.min(endIndex, sortedEntities.size());
    // add 1 to maxIndex because end index is exclusive
    Set<MetadataEntity> subSortedEntities = new LinkedHashSet<>(ImmutableList.copyOf(sortedEntities).subList(startIndex, endIndex));
    // Fetch metadata for entities in the result list
    // Note: since the fetch is happening in a different transaction, the metadata for entities may have been
    // removed. It is okay not to have metadata for some results in case this happens.
    Set<MetadataSearchResultRecord> finalResults = execute(context -> addMetadataToEntities(subSortedEntities, fetchMetadata(context.getDataset(SYSTEM), subSortedEntities), fetchMetadata(context.getDataset(USER), subSortedEntities)));
    return new MetadataSearchResponse(sortInfo.getSortBy() + " " + sortInfo.getSortOrder(), offset, limit, request.getNumCursors(), total, finalResults, cursors, request.shouldShowHidden(), request.getEntityScopes());
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) MetadataSearchResponse(io.cdap.cdap.proto.metadata.MetadataSearchResponse) SearchResults(io.cdap.cdap.data2.metadata.dataset.SearchResults) LinkedList(java.util.LinkedList) SortInfo(io.cdap.cdap.data2.metadata.dataset.SortInfo) MetadataSearchResultRecord(io.cdap.cdap.proto.metadata.MetadataSearchResultRecord) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) MetadataScope(io.cdap.cdap.api.metadata.MetadataScope)

Example 3 with SortInfo

use of io.cdap.cdap.data2.metadata.dataset.SortInfo in project cdap by caskdata.

the class MetadataHttpHandler method searchMetadata.

@GET
@Path("/namespaces/{namespace-id}/metadata/search")
public void searchMetadata(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @QueryParam("query") String searchQuery, @QueryParam("target") List<String> targets, @QueryParam("sort") @DefaultValue("") String sort, @QueryParam("offset") @DefaultValue("0") int offset, // 2147483647 is Integer.MAX_VALUE
@QueryParam("limit") @DefaultValue("2147483647") int limit, @QueryParam("numCursors") @DefaultValue("0") int numCursors, @QueryParam("cursor") @DefaultValue("") String cursor, @QueryParam("showHidden") @DefaultValue("false") boolean showHidden, @Nullable @QueryParam("entityScope") String entityScope) throws Exception {
    if (searchQuery == null || searchQuery.isEmpty()) {
        throw new BadRequestException("query is not specified");
    }
    Set<EntityTypeSimpleName> types = Collections.emptySet();
    if (targets != null) {
        types = ImmutableSet.copyOf(Iterables.transform(targets, STRING_TO_TARGET_TYPE));
    }
    SortInfo sortInfo = SortInfo.of(URLDecoder.decode(sort, "UTF-8"));
    if (SortInfo.DEFAULT.equals(sortInfo)) {
        if (!(cursor.isEmpty()) || 0 != numCursors) {
            throw new BadRequestException("Cursors are not supported when sort info is not specified.");
        }
    }
    try {
        MetadataSearchResponse response = metadataAdmin.search(namespaceId, URLDecoder.decode(searchQuery, "UTF-8"), types, sortInfo, offset, limit, numCursors, cursor, showHidden, validateEntityScope(entityScope));
        responder.sendJson(HttpResponseStatus.OK, GSON.toJson(response, MetadataSearchResponse.class));
    } catch (Exception e) {
        // if MetadataDataset throws an exception, it gets wrapped
        if (Throwables.getRootCause(e) instanceof BadRequestException) {
            throw new BadRequestException(e.getMessage(), e);
        }
        throw e;
    }
}
Also used : EntityTypeSimpleName(co.cask.cdap.proto.element.EntityTypeSimpleName) BadRequestException(co.cask.cdap.common.BadRequestException) MetadataSearchResponse(co.cask.cdap.proto.metadata.MetadataSearchResponse) BadRequestException(co.cask.cdap.common.BadRequestException) IOException(java.io.IOException) NotFoundException(co.cask.cdap.common.NotFoundException) SortInfo(co.cask.cdap.data2.metadata.dataset.SortInfo) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 4 with SortInfo

use of io.cdap.cdap.data2.metadata.dataset.SortInfo in project cdap by caskdata.

the class SearchHelper method getSortedEntities.

private Set<MetadataEntity> getSortedEntities(List<MetadataEntry> results, SortInfo sortInfo) {
    // in this case, the backing storage is expected to return results in the expected order.
    if (SortInfo.SortOrder.WEIGHTED != sortInfo.getSortOrder()) {
        Set<MetadataEntity> entities = new LinkedHashSet<>(results.size());
        for (MetadataEntry metadataEntry : results) {
            entities.add(metadataEntry.getMetadataEntity());
        }
        return entities;
    }
    // if sort order is weighted, score results by weight, and return in descending order of weights
    // Score results
    final Map<MetadataEntity, Integer> weightedResults = new HashMap<>();
    for (MetadataEntry metadataEntry : results) {
        weightedResults.put(metadataEntry.getMetadataEntity(), weightedResults.getOrDefault(metadataEntry.getMetadataEntity(), 0) + 1);
    }
    // Sort the results by score
    List<Map.Entry<MetadataEntity, Integer>> resultList = new ArrayList<>(weightedResults.entrySet());
    resultList.sort(SEARCH_RESULT_DESC_SCORE_COMPARATOR);
    Set<MetadataEntity> result = new LinkedHashSet<>(resultList.size());
    for (Map.Entry<MetadataEntity, Integer> entry : resultList) {
        result.add(entry.getKey());
    }
    return result;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MetadataEntity(io.cdap.cdap.api.metadata.MetadataEntity) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetadataEntry(io.cdap.cdap.data2.metadata.dataset.MetadataEntry) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

MetadataEntity (io.cdap.cdap.api.metadata.MetadataEntity)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 MetadataScope (io.cdap.cdap.api.metadata.MetadataScope)2 MetadataEntry (io.cdap.cdap.data2.metadata.dataset.MetadataEntry)2 SortInfo (io.cdap.cdap.data2.metadata.dataset.SortInfo)2 MetadataSearchResponse (io.cdap.cdap.proto.metadata.MetadataSearchResponse)2 IOException (java.io.IOException)2 BadRequestException (co.cask.cdap.common.BadRequestException)1 NotFoundException (co.cask.cdap.common.NotFoundException)1 SortInfo (co.cask.cdap.data2.metadata.dataset.SortInfo)1 EntityTypeSimpleName (co.cask.cdap.proto.element.EntityTypeSimpleName)1 MetadataSearchResponse (co.cask.cdap.proto.metadata.MetadataSearchResponse)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Maps (com.google.common.collect.Maps)1 Sets (com.google.common.collect.Sets)1 Inject (com.google.inject.Inject)1 Named (com.google.inject.name.Named)1 DatasetDefinition (io.cdap.cdap.api.dataset.DatasetDefinition)1 SYSTEM (io.cdap.cdap.api.metadata.MetadataScope.SYSTEM)1 USER (io.cdap.cdap.api.metadata.MetadataScope.USER)1