Search in sources :

Example 1 with AnalysisResult

use of io.zulia.message.ZuliaQuery.AnalysisResult in project zuliasearch by zuliaio.

the class AnalysisHandler method handleDocument.

public AnalysisResult handleDocument(Document document) {
    if (storedFieldName != null && enabled) {
        Object storeFieldValues = ResultHelper.getValueFromMongoDocument(document, storedFieldName);
        AnalysisResult.Builder analysisResult = AnalysisResult.newBuilder();
        analysisResult.setAnalysisRequest(analysisRequest);
        TermFreq docTermFreq = null;
        boolean needDocFreq = computeDocLevel || AnalysisRequest.SummaryType.TOP_TERMS_TOP_N.equals(summaryType);
        if (needDocFreq) {
            docTermFreq = new TermFreq(docFreq);
        }
        final TermFreq docTermFreqFinal = docTermFreq;
        ZuliaUtil.handleLists(storeFieldValues, (value) -> {
            String content = value.toString();
            try (TokenStream tokenStream = analyzer.tokenStream(indexField, content)) {
                tokenStream.reset();
                while (tokenStream.incrementToken()) {
                    String token = tokenStream.getAttribute(CharTermAttribute.class).toString();
                    if (analysisRequest.getTokens()) {
                        analysisResult.addToken(token);
                    }
                    if (minWordLength > 0) {
                        if (token.length() < minWordLength) {
                            continue;
                        }
                    }
                    if (maxWordLength > 0) {
                        if (token.length() > maxWordLength) {
                            continue;
                        }
                    }
                    if (maxShardDocFreqCount != null || minShardDocFreqCount != null) {
                        int termDocFreq = this.docFreq.getDocFreq(token);
                        if (minShardDocFreqCount != null) {
                            if (termDocFreq < minShardDocFreqCount) {
                                continue;
                            }
                        }
                        if (maxShardDocFreqCount != null) {
                            if (termDocFreq > maxShardDocFreqCount) {
                                continue;
                            }
                        }
                    }
                    if (needDocFreq) {
                        docTermFreqFinal.addTerm(token);
                    }
                    if (summaryLevelEnabled && AnalysisRequest.SummaryType.ALL_TERMS_TOP_N.equals(summaryType)) {
                        summaryTermFreq.addTerm(token);
                    }
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        if (computeDocLevel) {
            List<Term.Builder> termBuilderList = docTermFreq.getTopTerms(analysisRequest.getTopN(), analysisRequest.getTermSort());
            if (analysisRequest.getDocTerms()) {
                termBuilderList.forEach(analysisResult::addTerms);
                return analysisResult.build();
            }
            if (summaryLevelEnabled && AnalysisRequest.SummaryType.TOP_TERMS_TOP_N.equals(summaryType)) {
                termBuilderList.forEach(summaryTermFreq::addTerm);
            }
        }
        return null;
    }
    return null;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TermFreq(io.zulia.server.analysis.frequency.TermFreq) AnalysisResult(io.zulia.message.ZuliaQuery.AnalysisResult) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute)

Example 2 with AnalysisResult

use of io.zulia.message.ZuliaQuery.AnalysisResult in project zuliasearch by zuliaio.

the class QueryCombiner method getQueryResponse.

public QueryResponse getQueryResponse() throws Exception {
    validate();
    long totalHits = 0;
    long returnedHits = 0;
    for (ShardQueryResponse sr : shardResponses) {
        totalHits += sr.getTotalHits();
        returnedHits += sr.getScoredResultList().size();
    }
    QueryResponse.Builder builder = QueryResponse.newBuilder();
    builder.setTotalHits(totalHits);
    int resultsSize = Math.min(amount, (int) returnedHits);
    Map<CountRequest, FacetCombiner> facetCombinerMap = new HashMap<>();
    Map<StatRequest, StatCombiner> statCombinerMap = new HashMap<>();
    Map<AnalysisRequest, Map<String, Term.Builder>> analysisRequestToTermMap = new HashMap<>();
    int shardIndex = 0;
    for (ShardQueryResponse sr : shardResponses) {
        for (FacetGroup fg : sr.getFacetGroupList()) {
            CountRequest countRequest = fg.getCountRequest();
            FacetCombiner facetCombiner = facetCombinerMap.computeIfAbsent(countRequest, countRequest1 -> new FacetCombiner(countRequest, shardResponses.size()));
            facetCombiner.handleFacetGroupForShard(fg, shardIndex);
        }
        for (ZuliaQuery.StatGroup sg : sr.getStatGroupList()) {
            StatRequest statRequest = sg.getStatRequest();
            StatCombiner statCombiner = statCombinerMap.computeIfAbsent(statRequest, statRequest1 -> new StatCombiner(statRequest, shardResponses.size()));
            statCombiner.handleStatGroupForShard(sg, shardIndex);
        }
        for (AnalysisResult analysisResult : sr.getAnalysisResultList()) {
            AnalysisRequest analysisRequest = analysisResult.getAnalysisRequest();
            if (!analysisRequestToTermMap.containsKey(analysisRequest)) {
                analysisRequestToTermMap.put(analysisRequest, new HashMap<>());
            }
            Map<String, Term.Builder> termMap = analysisRequestToTermMap.get(analysisRequest);
            for (Term term : analysisResult.getTermsList()) {
                String key = term.getValue();
                if (!termMap.containsKey(key)) {
                    termMap.put(key, Term.newBuilder().setValue(key).setDocFreq(0).setTermFreq(0));
                }
                Term.Builder termsBuilder = termMap.get(key);
                termsBuilder.setDocFreq(termsBuilder.getDocFreq() + term.getDocFreq());
                termsBuilder.setScore(termsBuilder.getScore() + term.getScore());
                termsBuilder.setTermFreq(termsBuilder.getTermFreq() + term.getTermFreq());
            }
        }
        shardIndex++;
    }
    for (AnalysisRequest analysisRequest : analysisRequestList) {
        Map<String, Term.Builder> termMap = analysisRequestToTermMap.get(analysisRequest);
        if (termMap != null) {
            List<Term.Builder> terms = new ArrayList<>(termMap.values());
            List<Term.Builder> topTerms = TermFreq.getTopTerms(terms, analysisRequest.getTopN(), analysisRequest.getTermSort());
            AnalysisResult.Builder analysisResultBuilder = AnalysisResult.newBuilder().setAnalysisRequest(analysisRequest);
            topTerms.forEach(analysisResultBuilder::addTerms);
            builder.addAnalysisResult(analysisResultBuilder);
        }
    }
    for (FacetCombiner facetCombiner : facetCombinerMap.values()) {
        builder.addFacetGroup(facetCombiner.getCombinedFacetGroup());
    }
    for (StatCombiner statCombiner : statCombinerMap.values()) {
        builder.addStatGroup(statCombiner.getCombinedStatGroup());
    }
    Map<String, ScoredResult[]> lastIndexResultMap = createLastIndexResultMapWithPreviousLastResults();
    List<ScoredResult> results;
    if (shardResponses.size() > 1) {
        results = mergeResults((int) returnedHits, resultsSize, lastIndexResultMap);
    } else {
        ShardQueryResponse shardQueryResponse = shardResponses.get(0);
        results = shardQueryResponse.getScoredResultList();
        if (!results.isEmpty()) {
            lastIndexResultMap.get(shardQueryResponse.getIndexName())[shardQueryResponse.getShardNumber()] = results.get(results.size() - 1);
        }
    }
    if (start == 0) {
        builder.addAllResults(results);
    } else {
        int i = 0;
        for (ScoredResult scoredResult : results) {
            if (i >= start) {
                builder.addResults(scoredResult);
            }
            i++;
        }
    }
    builder.setLastResult(createLastResult(lastIndexResultMap));
    return builder.build();
}
Also used : AnalysisRequest(io.zulia.message.ZuliaQuery.AnalysisRequest) HashMap(java.util.HashMap) ZuliaQuery(io.zulia.message.ZuliaQuery) ArrayList(java.util.ArrayList) CountRequest(io.zulia.message.ZuliaQuery.CountRequest) StatRequest(io.zulia.message.ZuliaQuery.StatRequest) FacetGroup(io.zulia.message.ZuliaQuery.FacetGroup) ShardQueryResponse(io.zulia.message.ZuliaQuery.ShardQueryResponse) Term(io.zulia.message.ZuliaBase.Term) AnalysisResult(io.zulia.message.ZuliaQuery.AnalysisResult) QueryResponse(io.zulia.message.ZuliaServiceOuterClass.QueryResponse) ShardQueryResponse(io.zulia.message.ZuliaQuery.ShardQueryResponse) InternalQueryResponse(io.zulia.message.ZuliaServiceOuterClass.InternalQueryResponse) ScoredResult(io.zulia.message.ZuliaQuery.ScoredResult) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

AnalysisResult (io.zulia.message.ZuliaQuery.AnalysisResult)2 Term (io.zulia.message.ZuliaBase.Term)1 ZuliaQuery (io.zulia.message.ZuliaQuery)1 AnalysisRequest (io.zulia.message.ZuliaQuery.AnalysisRequest)1 CountRequest (io.zulia.message.ZuliaQuery.CountRequest)1 FacetGroup (io.zulia.message.ZuliaQuery.FacetGroup)1 ScoredResult (io.zulia.message.ZuliaQuery.ScoredResult)1 ShardQueryResponse (io.zulia.message.ZuliaQuery.ShardQueryResponse)1 StatRequest (io.zulia.message.ZuliaQuery.StatRequest)1 InternalQueryResponse (io.zulia.message.ZuliaServiceOuterClass.InternalQueryResponse)1 QueryResponse (io.zulia.message.ZuliaServiceOuterClass.QueryResponse)1 TermFreq (io.zulia.server.analysis.frequency.TermFreq)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1