use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.
the class DfsQueryPhase method run.
@Override
public void run() throws IOException {
// TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs
// to free up memory early
final AggregatedDfs dfs = searchPhaseController.aggregateDfs(dfsSearchResults);
final CountedCollector<QuerySearchResultProvider> counter = new CountedCollector<>(queryResult::consumeResult, dfsSearchResults.asList().size(), () -> {
context.executeNextPhase(this, nextPhaseFactory.apply(queryResult));
}, context);
for (final AtomicArray.Entry<DfsSearchResult> entry : dfsSearchResults.asList()) {
DfsSearchResult dfsResult = entry.value;
final int shardIndex = entry.index;
final SearchShardTarget searchShardTarget = dfsResult.shardTarget();
Transport.Connection connection = context.getConnection(searchShardTarget.getNodeId());
QuerySearchRequest querySearchRequest = new QuerySearchRequest(context.getRequest(), dfsResult.id(), dfs);
searchTransportService.sendExecuteQuery(connection, querySearchRequest, context.getTask(), ActionListener.wrap(result -> counter.onResult(shardIndex, result, searchShardTarget), exception -> {
try {
if (context.getLogger().isDebugEnabled()) {
context.getLogger().debug((Supplier<?>) () -> new ParameterizedMessage("[{}] Failed to execute query phase", querySearchRequest.id()), exception);
}
counter.onFailure(shardIndex, searchShardTarget, exception);
} finally {
context.sendReleaseSearchContext(querySearchRequest.id(), connection);
}
}));
}
}
use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.
the class SearchPhaseController method aggregateDfs.
public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
long aggMaxDoc = 0;
for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
final Term[] terms = lEntry.value.terms();
final TermStatistics[] stats = lEntry.value.termStatistics();
assert terms.length == stats.length;
for (int i = 0; i < terms.length; i++) {
assert terms[i] != null;
TermStatistics existing = termStatistics.get(terms[i]);
if (existing != null) {
assert terms[i].bytes().equals(existing.term());
// totalTermFrequency is an optional statistic we need to check if either one or both
// are set to -1 which means not present and then set it globally to -1
termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
} else {
termStatistics.put(terms[i], stats[i]);
}
}
assert !lEntry.value.fieldStatistics().containsKey(null);
final Object[] keys = lEntry.value.fieldStatistics().keys;
final Object[] values = lEntry.value.fieldStatistics().values;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) {
String key = (String) keys[i];
CollectionStatistics value = (CollectionStatistics) values[i];
assert key != null;
CollectionStatistics existing = fieldStatistics.get(key);
if (existing != null) {
CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
fieldStatistics.put(key, merged);
} else {
fieldStatistics.put(key, value);
}
}
}
aggMaxDoc += lEntry.value.maxDoc();
}
return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}
use of org.elasticsearch.search.dfs.AggregatedDfs in project elasticsearch by elastic.
the class TermVectorsService method getTermVectors.
static TermVectorsResponse getTermVectors(IndexShard indexShard, TermVectorsRequest request, LongSupplier nanoTimeSupplier) {
final long startTime = nanoTimeSupplier.getAsLong();
final TermVectorsResponse termVectorsResponse = new TermVectorsResponse(indexShard.shardId().getIndex().getName(), request.type(), request.id());
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm).version(request.version()).versionType(request.versionType()));
Fields termVectorsByField = null;
AggregatedDfs dfs = null;
TermVectorsFilter termVectorsFilter = null;
/* handle potential wildcards in fields */
if (request.selectedFields() != null) {
handleFieldWildcards(indexShard, request);
}
final Engine.Searcher searcher = indexShard.acquireSearcher("term_vector");
try {
Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
/* from an artificial document */
if (request.doc() != null) {
termVectorsByField = generateTermVectorsFromDoc(indexShard, request);
// if no document indexed in shard, take the queried document itself for stats
if (topLevelFields == null) {
topLevelFields = termVectorsByField;
}
termVectorsResponse.setArtificial(true);
termVectorsResponse.setExists(true);
} else /* or from an existing document */
if (docIdAndVersion != null) {
// fields with stored term vectors
termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
Set<String> selectedFields = request.selectedFields();
// generate tvs for fields where analyzer is overridden
if (selectedFields == null && request.perFieldAnalyzer() != null) {
selectedFields = getFieldsToGenerate(request.perFieldAnalyzer(), termVectorsByField);
}
// fields without term vectors
if (selectedFields != null) {
termVectorsByField = addGeneratedTermVectors(indexShard, get, termVectorsByField, request, selectedFields);
}
termVectorsResponse.setDocVersion(docIdAndVersion.version);
termVectorsResponse.setExists(true);
} else /* no term vectors generated or found */
{
termVectorsResponse.setExists(false);
}
/* if there are term vectors, optional compute dfs and/or terms filtering */
if (termVectorsByField != null) {
if (request.filterSettings() != null) {
termVectorsFilter = new TermVectorsFilter(termVectorsByField, topLevelFields, request.selectedFields(), dfs);
termVectorsFilter.setSettings(request.filterSettings());
try {
termVectorsFilter.selectBestTerms();
} catch (IOException e) {
throw new ElasticsearchException("failed to select best terms", e);
}
}
// write term vectors
termVectorsResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs, termVectorsFilter);
}
termVectorsResponse.setTookInMillis(TimeUnit.NANOSECONDS.toMillis(nanoTimeSupplier.getAsLong() - startTime));
} catch (Exception ex) {
throw new ElasticsearchException("failed to execute term vector request", ex);
} finally {
searcher.close();
get.release();
}
return termVectorsResponse;
}
Aggregations