Search in sources :

Example 1 with GetTermsResponse

use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.

the class ShardTermsHandler method handleShardTerms.

public GetTermsResponse handleShardTerms(GetTermsRequest request) throws IOException {
    GetTermsResponse.Builder builder = GetTermsResponse.newBuilder();
    String fieldName = request.getFieldName();
    SortedMap<String, ZuliaBase.Term.Builder> termsMap = new TreeMap<>();
    if (request.getIncludeTermCount() > 0) {
        Set<String> includeTerms = new TreeSet<>(request.getIncludeTermList());
        List<BytesRef> termBytesList = new ArrayList<>();
        for (String term : includeTerms) {
            BytesRef termBytes = new BytesRef(term);
            termBytesList.add(termBytes);
        }
        for (LeafReaderContext subReaderContext : indexReader.leaves()) {
            Terms terms = subReaderContext.reader().terms(fieldName);
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                for (BytesRef termBytes : termBytesList) {
                    if (termsEnum.seekExact(termBytes)) {
                        BytesRef text = termsEnum.term();
                        handleTerm(termsMap, termsEnum, text, null, null);
                    }
                }
            }
        }
    } else {
        AttributeSource atts = null;
        MaxNonCompetitiveBoostAttribute maxBoostAtt = null;
        boolean hasFuzzyTerm = request.hasFuzzyTerm();
        if (hasFuzzyTerm) {
            atts = new AttributeSource();
            maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
        }
        BytesRef startTermBytes;
        BytesRef endTermBytes = null;
        if (!request.getStartTerm().isEmpty()) {
            startTermBytes = new BytesRef(request.getStartTerm());
        } else {
            startTermBytes = new BytesRef("");
        }
        if (!request.getEndTerm().isEmpty()) {
            endTermBytes = new BytesRef(request.getEndTerm());
        }
        Pattern termFilter = null;
        if (!request.getTermFilter().isEmpty()) {
            termFilter = Pattern.compile(request.getTermFilter());
        }
        Pattern termMatch = null;
        if (!request.getTermMatch().isEmpty()) {
            termMatch = Pattern.compile(request.getTermMatch());
        }
        for (LeafReaderContext subReaderContext : indexReader.leaves()) {
            Terms terms = subReaderContext.reader().terms(fieldName);
            if (terms != null) {
                if (hasFuzzyTerm) {
                    ZuliaBase.FuzzyTerm fuzzyTerm = request.getFuzzyTerm();
                    Term term = new Term(fieldName, fuzzyTerm.getTerm());
                    PublicFuzzyQuery fuzzyQuery = new PublicFuzzyQuery(term, fuzzyTerm.getEditDistance(), fuzzyTerm.getPrefixLength(), FuzzyQuery.defaultMaxExpansions, !fuzzyTerm.getNoTranspositions());
                    TermsEnum termsEnum = fuzzyQuery.getTermsEnum(terms, atts);
                    BytesRef text = termsEnum.term();
                    handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
                    while ((text = termsEnum.next()) != null) {
                        handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
                    }
                } else {
                    TermsEnum termsEnum = terms.iterator();
                    TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(startTermBytes);
                    if (!seekStatus.equals(TermsEnum.SeekStatus.END)) {
                        BytesRef text = termsEnum.term();
                        if (endTermBytes == null || (text.compareTo(endTermBytes) < 0)) {
                            handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
                            while ((text = termsEnum.next()) != null) {
                                if (endTermBytes == null || (text.compareTo(endTermBytes) < 0)) {
                                    handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
                                } else {
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    for (ZuliaBase.Term.Builder termBuilder : termsMap.values()) {
        builder.addTerm(termBuilder.build());
    }
    return builder.build();
}
Also used : Pattern(java.util.regex.Pattern) AttributeSource(org.apache.lucene.util.AttributeSource) ZuliaBase(io.zulia.message.ZuliaBase) ArrayList(java.util.ArrayList) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) TreeMap(java.util.TreeMap) MaxNonCompetitiveBoostAttribute(org.apache.lucene.search.MaxNonCompetitiveBoostAttribute) TermsEnum(org.apache.lucene.index.TermsEnum) TreeSet(java.util.TreeSet) GetTermsResponse(io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with GetTermsResponse

use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.

the class TermsController method get.

@Get
@Produces({ MediaType.APPLICATION_JSON + ";charset=utf-8", MediaType.TEXT_PLAIN + ";charset=utf-8" })
public HttpResponse<?> get(@QueryValue(ZuliaConstants.INDEX) final String indexName, @QueryValue(ZuliaConstants.FIELDS) final String field, @Nullable @QueryValue(ZuliaConstants.AMOUNT) final Integer amount, @Nullable @QueryValue(ZuliaConstants.MIN_DOC_FREQ) final Integer minDocFreq, @Nullable @QueryValue(ZuliaConstants.MIN_TERM_FREQ) final Integer minTermFreq, @Nullable @QueryValue(ZuliaConstants.START_TERM) final String startTerm, @Nullable @QueryValue(ZuliaConstants.END_TERM) final String endTerm, @Nullable @QueryValue(ZuliaConstants.TERM_FILTER) final String termFilter, @Nullable @QueryValue(ZuliaConstants.TERM_MATCH) final String termMatch, @Nullable @QueryValue(ZuliaConstants.INCLUDE_TERM) final List<String> includeTerm, @Nullable @QueryValue(ZuliaConstants.FUZZY_TERM_JSON) final String fuzzyTermJson, @QueryValue(value = ZuliaConstants.PRETTY, defaultValue = "true") Boolean pretty, @QueryValue(value = ZuliaConstants.FORMAT, defaultValue = "json") final String format) {
    ZuliaIndexManager indexManager = ZuliaNodeProvider.getZuliaNode().getIndexManager();
    GetTermsRequest.Builder termsBuilder = GetTermsRequest.newBuilder();
    termsBuilder.setIndexName(indexName);
    termsBuilder.setFieldName(field);
    if (amount != null) {
        termsBuilder.setAmount(amount);
    }
    if (minDocFreq != null) {
        termsBuilder.setMinDocFreq(minDocFreq);
    }
    if (minTermFreq != null) {
        termsBuilder.setMinTermFreq(minTermFreq);
    }
    if (startTerm != null) {
        termsBuilder.setStartTerm(startTerm);
    }
    if (endTerm != null) {
        termsBuilder.setEndTerm(endTerm);
    }
    if (termFilter != null) {
        termsBuilder.setTermFilter(termFilter);
    }
    if (termMatch != null) {
        termsBuilder.setTermMatch(termMatch);
    }
    if (includeTerm != null) {
        termsBuilder.addAllIncludeTerm(includeTerm);
    }
    if (fuzzyTermJson != null) {
        try {
            FuzzyTerm.Builder fuzzyTermBuilder = FuzzyTerm.newBuilder();
            JsonFormat.parser().merge(fuzzyTermJson, fuzzyTermBuilder);
            termsBuilder.setFuzzyTerm(fuzzyTermBuilder);
        } catch (InvalidProtocolBufferException e) {
            return HttpResponse.ok("Failed to parse analyzer json: " + e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
        }
    }
    try {
        GetTermsResponse terms = indexManager.getTerms(termsBuilder.build());
        if (format.equalsIgnoreCase("json")) {
            Document document = new Document();
            document.put("index", indexName);
            document.put("field", field);
            List<Document> termsDocs = new ArrayList<>();
            for (Term term : terms.getTermList()) {
                Document termDoc = new Document();
                termDoc.put("term", term.getValue());
                termDoc.put("docFreq", term.getDocFreq());
                termDoc.put("termFreq", term.getTermFreq());
                termDoc.put("score", term.getScore());
                termsDocs.add(termDoc);
            }
            document.put("terms", termsDocs);
            String docString = document.toJson();
            if (pretty) {
                docString = JsonWriter.formatJson(docString);
            }
            return HttpResponse.ok(docString).status(ZuliaConstants.SUCCESS).contentType(MediaType.APPLICATION_JSON_TYPE + ";charset=utf-8");
        } else {
            StringBuilder csvString = new StringBuilder();
            csvString.append("term");
            csvString.append(",");
            csvString.append("termFreq");
            csvString.append(",");
            csvString.append("docFreq");
            if (termsBuilder.hasFuzzyTerm()) {
                csvString.append(",");
                csvString.append("score");
            }
            csvString.append("\n");
            for (Term term : terms.getTermList()) {
                String value = term.getValue();
                if (value.contains(",") || value.contains(" ") || value.contains("\"") || value.contains("\n")) {
                    csvString.append("\"");
                    csvString.append(value.replace("\"", "\"\""));
                    csvString.append("\"");
                } else {
                    csvString.append(value);
                }
                csvString.append(",");
                csvString.append(term.getTermFreq());
                csvString.append(",");
                csvString.append(term.getDocFreq());
                csvString.append(",");
                csvString.append(term.getScore());
                csvString.append("\n");
            }
            return HttpResponse.ok(csvString).status(ZuliaConstants.SUCCESS).contentType(MediaType.TEXT_PLAIN + ";charset=utf-8");
        }
    } catch (Exception e) {
        return HttpResponse.serverError("Failed to fetch fields for index <" + indexName + ">: " + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
    }
}
Also used : InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) ArrayList(java.util.ArrayList) Term(io.zulia.message.ZuliaBase.Term) FuzzyTerm(io.zulia.message.ZuliaBase.FuzzyTerm) Document(org.bson.Document) FuzzyTerm(io.zulia.message.ZuliaBase.FuzzyTerm) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) GetTermsResponse(io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse) ZuliaIndexManager(io.zulia.server.index.ZuliaIndexManager) GetTermsRequest(io.zulia.message.ZuliaServiceOuterClass.GetTermsRequest) Produces(io.micronaut.http.annotation.Produces) Get(io.micronaut.http.annotation.Get)

Example 3 with GetTermsResponse

use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.

the class GetTermsRequestFederator method getResponse.

public GetTermsResponse getResponse(GetTermsRequest request) throws Exception {
    List<InternalGetTermsResponse> responses = send(request);
    TreeMap<String, Term.Builder> terms = new TreeMap<>();
    for (InternalGetTermsResponse response : responses) {
        for (GetTermsResponse gtr : response.getGetTermsResponseList()) {
            for (Term term : gtr.getTermList()) {
                String key = term.getValue();
                if (!terms.containsKey(key)) {
                    Term.Builder termBuilder = Term.newBuilder().setValue(key).setDocFreq(0).setTermFreq(0);
                    termBuilder.setScore(0);
                    terms.put(key, termBuilder);
                }
                Term.Builder builder = terms.get(key);
                builder.setDocFreq(builder.getDocFreq() + term.getDocFreq());
                builder.setTermFreq(builder.getTermFreq() + term.getTermFreq());
                builder.setScore(builder.getScore() + term.getScore());
            }
        }
    }
    GetTermsResponse.Builder responseBuilder = GetTermsResponse.newBuilder();
    Term.Builder value = null;
    int count = 0;
    int amount = request.getAmount();
    for (Term.Builder builder : terms.values()) {
        value = builder;
        if (builder.getDocFreq() >= request.getMinDocFreq() && builder.getTermFreq() >= request.getMinTermFreq()) {
            responseBuilder.addTerm(builder.build());
            count++;
        }
        if (amount != 0 && count >= amount) {
            break;
        }
    }
    if (value != null) {
        responseBuilder.setLastTerm(value.build());
    }
    return responseBuilder.build();
}
Also used : InternalGetTermsResponse(io.zulia.message.ZuliaServiceOuterClass.InternalGetTermsResponse) GetTermsResponse(io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse) Term(io.zulia.message.ZuliaBase.Term) TreeMap(java.util.TreeMap) InternalGetTermsResponse(io.zulia.message.ZuliaServiceOuterClass.InternalGetTermsResponse)

Aggregations

GetTermsResponse (io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse)3 Term (io.zulia.message.ZuliaBase.Term)2 ArrayList (java.util.ArrayList)2 TreeMap (java.util.TreeMap)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 Get (io.micronaut.http.annotation.Get)1 Produces (io.micronaut.http.annotation.Produces)1 ZuliaBase (io.zulia.message.ZuliaBase)1 FuzzyTerm (io.zulia.message.ZuliaBase.FuzzyTerm)1 GetTermsRequest (io.zulia.message.ZuliaServiceOuterClass.GetTermsRequest)1 InternalGetTermsResponse (io.zulia.message.ZuliaServiceOuterClass.InternalGetTermsResponse)1 ZuliaIndexManager (io.zulia.server.index.ZuliaIndexManager)1 TreeSet (java.util.TreeSet)1 Pattern (java.util.regex.Pattern)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Term (org.apache.lucene.index.Term)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 MaxNonCompetitiveBoostAttribute (org.apache.lucene.search.MaxNonCompetitiveBoostAttribute)1 AttributeSource (org.apache.lucene.util.AttributeSource)1