use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.
the class ShardTermsHandler method handleShardTerms.
public GetTermsResponse handleShardTerms(GetTermsRequest request) throws IOException {
GetTermsResponse.Builder builder = GetTermsResponse.newBuilder();
String fieldName = request.getFieldName();
SortedMap<String, ZuliaBase.Term.Builder> termsMap = new TreeMap<>();
if (request.getIncludeTermCount() > 0) {
Set<String> includeTerms = new TreeSet<>(request.getIncludeTermList());
List<BytesRef> termBytesList = new ArrayList<>();
for (String term : includeTerms) {
BytesRef termBytes = new BytesRef(term);
termBytesList.add(termBytes);
}
for (LeafReaderContext subReaderContext : indexReader.leaves()) {
Terms terms = subReaderContext.reader().terms(fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
for (BytesRef termBytes : termBytesList) {
if (termsEnum.seekExact(termBytes)) {
BytesRef text = termsEnum.term();
handleTerm(termsMap, termsEnum, text, null, null);
}
}
}
}
} else {
AttributeSource atts = null;
MaxNonCompetitiveBoostAttribute maxBoostAtt = null;
boolean hasFuzzyTerm = request.hasFuzzyTerm();
if (hasFuzzyTerm) {
atts = new AttributeSource();
maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
}
BytesRef startTermBytes;
BytesRef endTermBytes = null;
if (!request.getStartTerm().isEmpty()) {
startTermBytes = new BytesRef(request.getStartTerm());
} else {
startTermBytes = new BytesRef("");
}
if (!request.getEndTerm().isEmpty()) {
endTermBytes = new BytesRef(request.getEndTerm());
}
Pattern termFilter = null;
if (!request.getTermFilter().isEmpty()) {
termFilter = Pattern.compile(request.getTermFilter());
}
Pattern termMatch = null;
if (!request.getTermMatch().isEmpty()) {
termMatch = Pattern.compile(request.getTermMatch());
}
for (LeafReaderContext subReaderContext : indexReader.leaves()) {
Terms terms = subReaderContext.reader().terms(fieldName);
if (terms != null) {
if (hasFuzzyTerm) {
ZuliaBase.FuzzyTerm fuzzyTerm = request.getFuzzyTerm();
Term term = new Term(fieldName, fuzzyTerm.getTerm());
PublicFuzzyQuery fuzzyQuery = new PublicFuzzyQuery(term, fuzzyTerm.getEditDistance(), fuzzyTerm.getPrefixLength(), FuzzyQuery.defaultMaxExpansions, !fuzzyTerm.getNoTranspositions());
TermsEnum termsEnum = fuzzyQuery.getTermsEnum(terms, atts);
BytesRef text = termsEnum.term();
handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
while ((text = termsEnum.next()) != null) {
handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
}
} else {
TermsEnum termsEnum = terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(startTermBytes);
if (!seekStatus.equals(TermsEnum.SeekStatus.END)) {
BytesRef text = termsEnum.term();
if (endTermBytes == null || (text.compareTo(endTermBytes) < 0)) {
handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
while ((text = termsEnum.next()) != null) {
if (endTermBytes == null || (text.compareTo(endTermBytes) < 0)) {
handleTerm(termsMap, termsEnum, text, termFilter, termMatch);
} else {
break;
}
}
}
}
}
}
}
}
for (ZuliaBase.Term.Builder termBuilder : termsMap.values()) {
builder.addTerm(termBuilder.build());
}
return builder.build();
}
use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.
the class TermsController method get.
@Get
@Produces({ MediaType.APPLICATION_JSON + ";charset=utf-8", MediaType.TEXT_PLAIN + ";charset=utf-8" })
public HttpResponse<?> get(@QueryValue(ZuliaConstants.INDEX) final String indexName, @QueryValue(ZuliaConstants.FIELDS) final String field, @Nullable @QueryValue(ZuliaConstants.AMOUNT) final Integer amount, @Nullable @QueryValue(ZuliaConstants.MIN_DOC_FREQ) final Integer minDocFreq, @Nullable @QueryValue(ZuliaConstants.MIN_TERM_FREQ) final Integer minTermFreq, @Nullable @QueryValue(ZuliaConstants.START_TERM) final String startTerm, @Nullable @QueryValue(ZuliaConstants.END_TERM) final String endTerm, @Nullable @QueryValue(ZuliaConstants.TERM_FILTER) final String termFilter, @Nullable @QueryValue(ZuliaConstants.TERM_MATCH) final String termMatch, @Nullable @QueryValue(ZuliaConstants.INCLUDE_TERM) final List<String> includeTerm, @Nullable @QueryValue(ZuliaConstants.FUZZY_TERM_JSON) final String fuzzyTermJson, @QueryValue(value = ZuliaConstants.PRETTY, defaultValue = "true") Boolean pretty, @QueryValue(value = ZuliaConstants.FORMAT, defaultValue = "json") final String format) {
ZuliaIndexManager indexManager = ZuliaNodeProvider.getZuliaNode().getIndexManager();
GetTermsRequest.Builder termsBuilder = GetTermsRequest.newBuilder();
termsBuilder.setIndexName(indexName);
termsBuilder.setFieldName(field);
if (amount != null) {
termsBuilder.setAmount(amount);
}
if (minDocFreq != null) {
termsBuilder.setMinDocFreq(minDocFreq);
}
if (minTermFreq != null) {
termsBuilder.setMinTermFreq(minTermFreq);
}
if (startTerm != null) {
termsBuilder.setStartTerm(startTerm);
}
if (endTerm != null) {
termsBuilder.setEndTerm(endTerm);
}
if (termFilter != null) {
termsBuilder.setTermFilter(termFilter);
}
if (termMatch != null) {
termsBuilder.setTermMatch(termMatch);
}
if (includeTerm != null) {
termsBuilder.addAllIncludeTerm(includeTerm);
}
if (fuzzyTermJson != null) {
try {
FuzzyTerm.Builder fuzzyTermBuilder = FuzzyTerm.newBuilder();
JsonFormat.parser().merge(fuzzyTermJson, fuzzyTermBuilder);
termsBuilder.setFuzzyTerm(fuzzyTermBuilder);
} catch (InvalidProtocolBufferException e) {
return HttpResponse.ok("Failed to parse analyzer json: " + e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
try {
GetTermsResponse terms = indexManager.getTerms(termsBuilder.build());
if (format.equalsIgnoreCase("json")) {
Document document = new Document();
document.put("index", indexName);
document.put("field", field);
List<Document> termsDocs = new ArrayList<>();
for (Term term : terms.getTermList()) {
Document termDoc = new Document();
termDoc.put("term", term.getValue());
termDoc.put("docFreq", term.getDocFreq());
termDoc.put("termFreq", term.getTermFreq());
termDoc.put("score", term.getScore());
termsDocs.add(termDoc);
}
document.put("terms", termsDocs);
String docString = document.toJson();
if (pretty) {
docString = JsonWriter.formatJson(docString);
}
return HttpResponse.ok(docString).status(ZuliaConstants.SUCCESS).contentType(MediaType.APPLICATION_JSON_TYPE + ";charset=utf-8");
} else {
StringBuilder csvString = new StringBuilder();
csvString.append("term");
csvString.append(",");
csvString.append("termFreq");
csvString.append(",");
csvString.append("docFreq");
if (termsBuilder.hasFuzzyTerm()) {
csvString.append(",");
csvString.append("score");
}
csvString.append("\n");
for (Term term : terms.getTermList()) {
String value = term.getValue();
if (value.contains(",") || value.contains(" ") || value.contains("\"") || value.contains("\n")) {
csvString.append("\"");
csvString.append(value.replace("\"", "\"\""));
csvString.append("\"");
} else {
csvString.append(value);
}
csvString.append(",");
csvString.append(term.getTermFreq());
csvString.append(",");
csvString.append(term.getDocFreq());
csvString.append(",");
csvString.append(term.getScore());
csvString.append("\n");
}
return HttpResponse.ok(csvString).status(ZuliaConstants.SUCCESS).contentType(MediaType.TEXT_PLAIN + ";charset=utf-8");
}
} catch (Exception e) {
return HttpResponse.serverError("Failed to fetch fields for index <" + indexName + ">: " + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
use of io.zulia.message.ZuliaServiceOuterClass.GetTermsResponse in project zuliasearch by zuliaio.
the class GetTermsRequestFederator method getResponse.
public GetTermsResponse getResponse(GetTermsRequest request) throws Exception {
List<InternalGetTermsResponse> responses = send(request);
TreeMap<String, Term.Builder> terms = new TreeMap<>();
for (InternalGetTermsResponse response : responses) {
for (GetTermsResponse gtr : response.getGetTermsResponseList()) {
for (Term term : gtr.getTermList()) {
String key = term.getValue();
if (!terms.containsKey(key)) {
Term.Builder termBuilder = Term.newBuilder().setValue(key).setDocFreq(0).setTermFreq(0);
termBuilder.setScore(0);
terms.put(key, termBuilder);
}
Term.Builder builder = terms.get(key);
builder.setDocFreq(builder.getDocFreq() + term.getDocFreq());
builder.setTermFreq(builder.getTermFreq() + term.getTermFreq());
builder.setScore(builder.getScore() + term.getScore());
}
}
}
GetTermsResponse.Builder responseBuilder = GetTermsResponse.newBuilder();
Term.Builder value = null;
int count = 0;
int amount = request.getAmount();
for (Term.Builder builder : terms.values()) {
value = builder;
if (builder.getDocFreq() >= request.getMinDocFreq() && builder.getTermFreq() >= request.getMinTermFreq()) {
responseBuilder.addTerm(builder.build());
count++;
}
if (amount != 0 && count >= amount) {
break;
}
}
if (value != null) {
responseBuilder.setLastTerm(value.build());
}
return responseBuilder.build();
}
Aggregations