use of com.yahoo.searchlib.aggregation.Grouping in project vespa by vespa-engine.
the class GroupingExecutor method search.
@Override
public Result search(Query query, Execution execution) {
String error = QueryCanonicalizer.canonicalize(query);
if (error != null) {
return new Result(query, ErrorMessage.createIllegalQuery(error));
}
query.prepare();
// Retrieve grouping requests from query.
List<GroupingRequest> reqList = GroupingRequest.getRequests(query);
if (reqList.isEmpty()) {
return execution.search(query);
}
// Convert requests to Vespa style grouping.
Map<Integer, Grouping> groupingMap = new HashMap<>();
List<RequestContext> requestContextList = new LinkedList<>();
for (GroupingRequest grpRequest : reqList) {
requestContextList.add(convertRequest(query, grpRequest, groupingMap));
}
if (groupingMap.isEmpty()) {
return execution.search(query);
}
// Perform the necessary passes to execute grouping.
Result result = performSearch(query, execution, groupingMap);
// Convert Vespa style results to hits.
HitConverter hitConverter = new HitConverter(this, query);
for (RequestContext context : requestContextList) {
RootGroup group = convertResult(context, groupingMap, hitConverter);
context.request.setResultGroup(group);
result.hits().add(group);
}
return result;
}
use of com.yahoo.searchlib.aggregation.Grouping in project vespa by vespa-engine.
the class GroupingExecutor method performSearch.
/**
* Performs the actual search passes to complete all the given {@link Grouping} requests. This method uses the
* grouping map argument as both an input and an output variable, as the contained {@link Grouping} objects are
* updates as results arrive from the back end.
*
* @param query The query to execute.
* @param execution The execution context used to run the queries.
* @param groupingMap The map of grouping requests to perform.
* @return The search result to pass back from this searcher.
*/
private Result performSearch(Query query, Execution execution, Map<Integer, Grouping> groupingMap) {
// Determine how many passes to perform.
int lastPass = 0;
for (Grouping grouping : groupingMap.values()) {
if (!grouping.useSinglePass()) {
lastPass = Math.max(lastPass, grouping.getLevels().size());
}
}
// Perform multi-pass query to complete all grouping requests.
Item origRoot = query.getModel().getQueryTree().getRoot();
int prePassErrors = query.errors().size();
Result ret = null;
Item baseRoot = origRoot;
if (lastPass > 0) {
baseRoot = origRoot.clone();
}
if (query.isTraceable(3) && query.getGroupingSessionCache()) {
query.trace("Grouping in " + (lastPass + 1) + " passes. SessionId='" + query.getSessionId(true) + "'.", 3);
}
for (int pass = 0; pass <= lastPass; ++pass) {
boolean firstPass = (pass == 0);
List<Grouping> passList = getGroupingListForPassN(groupingMap, pass);
if (passList.isEmpty()) {
throw new RuntimeException("No grouping request for pass " + pass + ", bug!");
}
if (log.isLoggable(LogLevel.DEBUG)) {
for (Grouping grouping : passList) {
log.log(LogLevel.DEBUG, "Pass(" + pass + "), Grouping(" + grouping.getId() + "): " + grouping);
}
}
Item passRoot;
if (firstPass) {
// Use original query the first time.
passRoot = origRoot;
} else if (pass == lastPass) {
// Has already been cloned once, use this for last pass.
passRoot = baseRoot;
} else {
// noinspection ConstantConditions
passRoot = baseRoot.clone();
}
if (query.isTraceable(4) && query.getGroupingSessionCache()) {
query.trace("Grouping with session cache '" + query.getGroupingSessionCache() + "' enabled for pass #" + pass + ".", 4);
}
if (origRoot != passRoot) {
query.getModel().getQueryTree().setRoot(passRoot);
}
setGroupingList(query, passList);
Result passResult = execution.search(query);
if (passResult.hits().getError() != null) {
if (firstPass) {
if (passResult.hits().getErrorHit().errors().size() > prePassErrors || passResult.hits().getErrorHit().errors().size() == 0) {
return passResult;
}
} else {
return passResult;
}
}
Map<Integer, Grouping> passGroupingMap = mergeGroupingResults(passResult);
mergeGroupingMaps(groupingMap, passGroupingMap);
if (firstPass) {
ret = passResult;
}
}
if (log.isLoggable(LogLevel.DEBUG)) {
for (Grouping grouping : groupingMap.values()) {
log.log(LogLevel.DEBUG, "Result Grouping(" + grouping.getId() + "): " + grouping);
}
}
return ret;
}
use of com.yahoo.searchlib.aggregation.Grouping in project vespa by vespa-engine.
the class GroupingExecutor method convertRequest.
/**
* Converts the given {@link GroupingRequest} into a set of {@link Grouping} objects. The returned object holds the
* context that corresponds to the given request, whereas the created {@link Grouping} objects are written directly
* to the given map.
*
* @param query The query being executed.
* @param req The request to convert.
* @param map The grouping map to write to.
* @return The context required to identify the request results.
*/
private RequestContext convertRequest(Query query, GroupingRequest req, Map<Integer, Grouping> map) {
RequestBuilder builder = new RequestBuilder(req.getRequestId());
builder.setRootOperation(req.getRootOperation());
builder.setDefaultSummaryName(query.getPresentation().getSummary());
builder.setTimeZone(req.getTimeZone());
builder.addContinuations(req.continuations());
builder.build();
RequestContext ctx = new RequestContext(req, builder.getTransform());
List<Grouping> grpList = builder.getRequestList();
for (Grouping grp : grpList) {
int grpId = map.size();
grp.setId(grpId);
map.put(grpId, grp);
ctx.idList.add(grpId);
}
return ctx;
}
use of com.yahoo.searchlib.aggregation.Grouping in project vespa by vespa-engine.
the class VdsStreamingSearcher method doSearch2.
@Override
public Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution) {
// TODO refactor this method into smaller methods, it's hard to see the actual code
lazyTrace(query, 7, "Routing to storage cluster ", getStorageClusterRouteSpec());
if (route == null) {
route = Route.parse(getStorageClusterRouteSpec());
}
lazyTrace(query, 8, "Route is ", route);
lazyTrace(query, 7, "doSearch2(): query docsum class=", query.getPresentation().getSummary(), ", default docsum class=", getDefaultDocsumClass());
if (query.getPresentation().getSummary() == null) {
lazyTrace(query, 6, "doSearch2(): No summary class specified in query, using default: ", getDefaultDocsumClass());
query.getPresentation().setSummary(getDefaultDocsumClass());
} else {
lazyTrace(query, 6, "doSearch2(): Summary class has been specified in query: ", query.getPresentation().getSummary());
}
lazyTrace(query, 8, "doSearch2(): rank properties=", query.getRanking());
lazyTrace(query, 8, "doSearch2(): sort specification=", query.getRanking().getSorting() == null ? null : query.getRanking().getSorting().fieldOrders());
int documentSelectionQueryParameterCount = 0;
if (query.properties().getString(streamingUserid) != null)
documentSelectionQueryParameterCount++;
if (query.properties().getString(streamingGroupname) != null)
documentSelectionQueryParameterCount++;
if (query.properties().getString(streamingSelection) != null)
documentSelectionQueryParameterCount++;
if (documentSelectionQueryParameterCount != 1) {
return new Result(query, ErrorMessage.createBackendCommunicationError("Streaming search needs one and " + "only one of these query parameters to be set: streaming.userid, streaming.groupname, " + "streaming.selection"));
}
query.trace("Routing to search cluster " + getSearchClusterConfigId(), 4);
Visitor visitor = visitorFactory.createVisitor(query, getSearchClusterConfigId(), route);
try {
visitor.doSearch();
} catch (ParseException e) {
return new Result(query, ErrorMessage.createBackendCommunicationError("Failed to parse document selection string: " + e.getMessage() + "'."));
} catch (TokenMgrError e) {
return new Result(query, ErrorMessage.createBackendCommunicationError("Failed to tokenize document selection string: " + e.getMessage() + "'."));
} catch (TimeoutException e) {
return new Result(query, ErrorMessage.createTimeout(e.getMessage()));
} catch (InterruptedException | IllegalArgumentException e) {
return new Result(query, ErrorMessage.createBackendCommunicationError(e.getMessage()));
}
lazyTrace(query, 8, "offset=", query.getOffset(), ", hits=", query.getHits());
Result result = new Result(query);
// Sorted on rank
List<SearchResult.Hit> hits = visitor.getHits();
Map<String, DocumentSummary.Summary> summaryMap = visitor.getSummaryMap();
lazyTrace(query, 7, "total hit count = ", visitor.getTotalHitCount(), ", returned hit count = ", hits.size(), ", summary count = ", summaryMap.size());
result.setTotalHitCount(visitor.getTotalHitCount());
query.trace(visitor.getStatistics().toString(), false, 2);
query.getContext(true).setProperty(STREAMING_STATISTICS, visitor.getStatistics());
Packet[] summaryPackets = new Packet[hits.size()];
int index = 0;
boolean skippedEarlierResult = false;
for (SearchResult.Hit hit : hits) {
if (!verifyDocId(hit.getDocId(), query, skippedEarlierResult)) {
skippedEarlierResult = true;
continue;
}
FastHit fastHit = buildSummaryHit(query, hit);
result.hits().add(fastHit);
DocumentSummary.Summary summary = summaryMap.get(hit.getDocId());
if (summary != null) {
DocsumPacket dp = new DocsumPacket(summary.getSummary());
// log.log(LogLevel.SPAM, "DocsumPacket: " + dp);
summaryPackets[index] = dp;
} else {
return new Result(query, ErrorMessage.createBackendCommunicationError("Did not find summary for hit with document id " + hit.getDocId()));
}
index++;
}
if (result.isFilled(query.getPresentation().getSummary())) {
lazyTrace(query, 8, "Result is filled for summary class ", query.getPresentation().getSummary());
} else {
lazyTrace(query, 8, "Result is not filled for summary class ", query.getPresentation().getSummary());
}
List<Grouping> groupingList = visitor.getGroupings();
lazyTrace(query, 8, "Grouping list=", groupingList);
if (!groupingList.isEmpty()) {
GroupingListHit groupHit = new GroupingListHit(groupingList, getDocsumDefinitionSet(query));
result.hits().add(groupHit);
}
int skippedHits;
try {
FillHitsResult fillHitsResult = fillHits(result, summaryPackets, query.getPresentation().getSummary());
skippedHits = fillHitsResult.skippedHits;
if (fillHitsResult.error != null) {
result.hits().addError(ErrorMessage.createTimeout(fillHitsResult.error));
return result;
}
} catch (TimeoutException e) {
result.hits().addError(ErrorMessage.createTimeout(e.getMessage()));
return result;
} catch (IOException e) {
return new Result(query, ErrorMessage.createBackendCommunicationError("Error filling hits with summary fields"));
}
if (skippedHits == 0) {
// TODO: cache results or result.analyzeHits(); ?
query.trace("All hits have been filled", 4);
} else {
lazyTrace(query, 8, "Skipping some hits for query: ", result.getQuery());
}
lazyTrace(query, 8, "Returning result ", result);
if (skippedHits > 0) {
getLogger().info("skipping " + skippedHits + " hits for query: " + result.getQuery());
result.hits().addError(com.yahoo.search.result.ErrorMessage.createTimeout("Missing hit summary data for " + skippedHits + " hits"));
}
return result;
}
use of com.yahoo.searchlib.aggregation.Grouping in project vespa by vespa-engine.
the class VdsVisitor method setVisitorParameters.
private void setVisitorParameters(String searchCluster, Route route) {
if (query.properties().getString(streamingUserid) != null) {
params.setDocumentSelection("id.user==" + query.properties().getString(streamingUserid));
} else if (query.properties().getString(streamingGroupname) != null) {
params.setDocumentSelection("id.group==\"" + query.properties().getString(streamingGroupname) + "\"");
} else if (query.properties().getString(streamingSelection) != null) {
params.setDocumentSelection(query.properties().getString(streamingSelection));
}
// Per bucket visitor timeout
params.setTimeoutMs(query.getTimeout());
params.setSessionTimeoutMs(query.getTimeout());
params.setVisitorLibrary("searchvisitor");
params.setLocalDataHandler(this);
params.setVisitHeadersOnly(query.properties().getBoolean(streamingHeadersonly));
if (query.properties().getDouble(streamingFromtimestamp) != null) {
params.setFromTimestamp(query.properties().getDouble(streamingFromtimestamp).longValue());
}
if (query.properties().getDouble(streamingTotimestamp) != null) {
params.setToTimestamp(query.properties().getDouble(streamingTotimestamp).longValue());
}
params.visitInconsistentBuckets(true);
params.setPriority(DocumentProtocol.Priority.VERY_HIGH);
if (query.properties().getString(streamingLoadtype) != null) {
LoadType loadType = visitorSessionFactory.getLoadTypeSet().getNameMap().get(query.properties().getString(streamingLoadtype));
if (loadType != null) {
params.setLoadType(loadType);
params.setPriority(loadType.getPriority());
}
}
if (query.properties().getString(streamingPriority) != null) {
params.setPriority(DocumentProtocol.getPriorityByName(query.properties().getString(streamingPriority)));
}
params.setMaxPending(Integer.MAX_VALUE);
params.setMaxBucketsPerVisitor(Integer.MAX_VALUE);
params.setTraceLevel(inferSessionTraceLevel(query));
String ordering = query.properties().getString(streamingOrdering);
if (ordering != null) {
params.setVisitorOrdering(getOrdering(ordering));
params.setMaxFirstPassHits(query.getOffset() + query.getHits());
params.setMaxBucketsPerVisitor(1);
params.setDynamicallyIncreaseMaxBucketsPerVisitor(true);
}
String maxbuckets = query.properties().getString(streamingMaxbucketspervisitor);
if (maxbuckets != null) {
params.setMaxBucketsPerVisitor(Integer.parseInt(maxbuckets));
}
EncodedData ed = new EncodedData();
encodeQueryData(query, 0, ed);
params.setLibraryParameter("query", ed.getEncodedData());
params.setLibraryParameter("querystackcount", String.valueOf(ed.getReturned()));
params.setLibraryParameter("searchcluster", searchCluster.getBytes());
if (query.getPresentation().getSummary() != null) {
params.setLibraryParameter("summaryclass", query.getPresentation().getSummary());
} else {
params.setLibraryParameter("summaryclass", "default");
}
params.setLibraryParameter("summarycount", String.valueOf(query.getOffset() + query.getHits()));
params.setLibraryParameter("rankprofile", query.getRanking().getProfile());
params.setLibraryParameter("allowslimedocsums", "true");
params.setLibraryParameter("queryflags", String.valueOf(getQueryFlags(query)));
ByteBuffer buf = ByteBuffer.allocate(1024);
if (query.getRanking().getLocation() != null) {
buf.clear();
query.getRanking().getLocation().encode(buf);
buf.flip();
byte[] af = new byte[buf.remaining()];
buf.get(af);
params.setLibraryParameter("location", af);
}
if (query.hasEncodableProperties()) {
encodeQueryData(query, 1, ed);
params.setLibraryParameter("rankproperties", ed.getEncodedData());
}
List<Grouping> groupingList = GroupingExecutor.getGroupingList(query);
if (groupingList.size() > 0) {
BufferSerializer gbuf = new BufferSerializer(new GrowableByteBuffer());
gbuf.putInt(null, groupingList.size());
for (Grouping g : groupingList) {
g.serialize(gbuf);
}
gbuf.flip();
byte[] blob = gbuf.getBytes(null, gbuf.getBuf().limit());
params.setLibraryParameter("aggregation", blob);
}
if (query.getRanking().getSorting() != null) {
encodeQueryData(query, 3, ed);
params.setLibraryParameter("sort", ed.getEncodedData());
}
params.setRoute(route);
}
Aggregations