use of io.zulia.message.ZuliaQuery.AnalysisResult in project zuliasearch by zuliaio.
the class AnalysisHandler method handleDocument.
public AnalysisResult handleDocument(Document document) {
if (storedFieldName != null && enabled) {
Object storeFieldValues = ResultHelper.getValueFromMongoDocument(document, storedFieldName);
AnalysisResult.Builder analysisResult = AnalysisResult.newBuilder();
analysisResult.setAnalysisRequest(analysisRequest);
TermFreq docTermFreq = null;
boolean needDocFreq = computeDocLevel || AnalysisRequest.SummaryType.TOP_TERMS_TOP_N.equals(summaryType);
if (needDocFreq) {
docTermFreq = new TermFreq(docFreq);
}
final TermFreq docTermFreqFinal = docTermFreq;
ZuliaUtil.handleLists(storeFieldValues, (value) -> {
String content = value.toString();
try (TokenStream tokenStream = analyzer.tokenStream(indexField, content)) {
tokenStream.reset();
while (tokenStream.incrementToken()) {
String token = tokenStream.getAttribute(CharTermAttribute.class).toString();
if (analysisRequest.getTokens()) {
analysisResult.addToken(token);
}
if (minWordLength > 0) {
if (token.length() < minWordLength) {
continue;
}
}
if (maxWordLength > 0) {
if (token.length() > maxWordLength) {
continue;
}
}
if (maxShardDocFreqCount != null || minShardDocFreqCount != null) {
int termDocFreq = this.docFreq.getDocFreq(token);
if (minShardDocFreqCount != null) {
if (termDocFreq < minShardDocFreqCount) {
continue;
}
}
if (maxShardDocFreqCount != null) {
if (termDocFreq > maxShardDocFreqCount) {
continue;
}
}
}
if (needDocFreq) {
docTermFreqFinal.addTerm(token);
}
if (summaryLevelEnabled && AnalysisRequest.SummaryType.ALL_TERMS_TOP_N.equals(summaryType)) {
summaryTermFreq.addTerm(token);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
});
if (computeDocLevel) {
List<Term.Builder> termBuilderList = docTermFreq.getTopTerms(analysisRequest.getTopN(), analysisRequest.getTermSort());
if (analysisRequest.getDocTerms()) {
termBuilderList.forEach(analysisResult::addTerms);
return analysisResult.build();
}
if (summaryLevelEnabled && AnalysisRequest.SummaryType.TOP_TERMS_TOP_N.equals(summaryType)) {
termBuilderList.forEach(summaryTermFreq::addTerm);
}
}
return null;
}
return null;
}
use of io.zulia.message.ZuliaQuery.AnalysisResult in project zuliasearch by zuliaio.
the class QueryCombiner method getQueryResponse.
public QueryResponse getQueryResponse() throws Exception {
validate();
long totalHits = 0;
long returnedHits = 0;
for (ShardQueryResponse sr : shardResponses) {
totalHits += sr.getTotalHits();
returnedHits += sr.getScoredResultList().size();
}
QueryResponse.Builder builder = QueryResponse.newBuilder();
builder.setTotalHits(totalHits);
int resultsSize = Math.min(amount, (int) returnedHits);
Map<CountRequest, FacetCombiner> facetCombinerMap = new HashMap<>();
Map<StatRequest, StatCombiner> statCombinerMap = new HashMap<>();
Map<AnalysisRequest, Map<String, Term.Builder>> analysisRequestToTermMap = new HashMap<>();
int shardIndex = 0;
for (ShardQueryResponse sr : shardResponses) {
for (FacetGroup fg : sr.getFacetGroupList()) {
CountRequest countRequest = fg.getCountRequest();
FacetCombiner facetCombiner = facetCombinerMap.computeIfAbsent(countRequest, countRequest1 -> new FacetCombiner(countRequest, shardResponses.size()));
facetCombiner.handleFacetGroupForShard(fg, shardIndex);
}
for (ZuliaQuery.StatGroup sg : sr.getStatGroupList()) {
StatRequest statRequest = sg.getStatRequest();
StatCombiner statCombiner = statCombinerMap.computeIfAbsent(statRequest, statRequest1 -> new StatCombiner(statRequest, shardResponses.size()));
statCombiner.handleStatGroupForShard(sg, shardIndex);
}
for (AnalysisResult analysisResult : sr.getAnalysisResultList()) {
AnalysisRequest analysisRequest = analysisResult.getAnalysisRequest();
if (!analysisRequestToTermMap.containsKey(analysisRequest)) {
analysisRequestToTermMap.put(analysisRequest, new HashMap<>());
}
Map<String, Term.Builder> termMap = analysisRequestToTermMap.get(analysisRequest);
for (Term term : analysisResult.getTermsList()) {
String key = term.getValue();
if (!termMap.containsKey(key)) {
termMap.put(key, Term.newBuilder().setValue(key).setDocFreq(0).setTermFreq(0));
}
Term.Builder termsBuilder = termMap.get(key);
termsBuilder.setDocFreq(termsBuilder.getDocFreq() + term.getDocFreq());
termsBuilder.setScore(termsBuilder.getScore() + term.getScore());
termsBuilder.setTermFreq(termsBuilder.getTermFreq() + term.getTermFreq());
}
}
shardIndex++;
}
for (AnalysisRequest analysisRequest : analysisRequestList) {
Map<String, Term.Builder> termMap = analysisRequestToTermMap.get(analysisRequest);
if (termMap != null) {
List<Term.Builder> terms = new ArrayList<>(termMap.values());
List<Term.Builder> topTerms = TermFreq.getTopTerms(terms, analysisRequest.getTopN(), analysisRequest.getTermSort());
AnalysisResult.Builder analysisResultBuilder = AnalysisResult.newBuilder().setAnalysisRequest(analysisRequest);
topTerms.forEach(analysisResultBuilder::addTerms);
builder.addAnalysisResult(analysisResultBuilder);
}
}
for (FacetCombiner facetCombiner : facetCombinerMap.values()) {
builder.addFacetGroup(facetCombiner.getCombinedFacetGroup());
}
for (StatCombiner statCombiner : statCombinerMap.values()) {
builder.addStatGroup(statCombiner.getCombinedStatGroup());
}
Map<String, ScoredResult[]> lastIndexResultMap = createLastIndexResultMapWithPreviousLastResults();
List<ScoredResult> results;
if (shardResponses.size() > 1) {
results = mergeResults((int) returnedHits, resultsSize, lastIndexResultMap);
} else {
ShardQueryResponse shardQueryResponse = shardResponses.get(0);
results = shardQueryResponse.getScoredResultList();
if (!results.isEmpty()) {
lastIndexResultMap.get(shardQueryResponse.getIndexName())[shardQueryResponse.getShardNumber()] = results.get(results.size() - 1);
}
}
if (start == 0) {
builder.addAllResults(results);
} else {
int i = 0;
for (ScoredResult scoredResult : results) {
if (i >= start) {
builder.addResults(scoredResult);
}
i++;
}
}
builder.setLastResult(createLastResult(lastIndexResultMap));
return builder.build();
}
Aggregations