use of org.apache.lucene.queries.spans.SpanOrQuery in project OpenSearch by opensearch-project.
the class TextFieldMapper method createPhrasePrefixQuery.
public static Query createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, String prefixField, IntPredicate usePrefixField) throws IOException {
MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field);
builder.setSlop(slop);
builder.setMaxExpansions(maxExpansions);
List<Term> currentTerms = new ArrayList<>();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset();
int position = -1;
while (stream.incrementToken()) {
if (posIncrAtt.getPositionIncrement() != 0) {
if (currentTerms.isEmpty() == false) {
builder.add(currentTerms.toArray(new Term[0]), position);
}
position += posIncrAtt.getPositionIncrement();
currentTerms.clear();
}
currentTerms.add(new Term(field, termAtt.getBytesRef()));
}
builder.add(currentTerms.toArray(new Term[0]), position);
if (prefixField == null) {
return builder;
}
int lastPos = builder.getTerms().length - 1;
final Term[][] terms = builder.getTerms();
final int[] positions = builder.getPositions();
for (Term term : terms[lastPos]) {
String value = term.text();
if (usePrefixField.test(value.length()) == false) {
return builder;
}
}
if (terms.length == 1) {
SynonymQuery.Builder sb = new SynonymQuery.Builder(prefixField);
Arrays.stream(terms[0]).map(term -> new Term(prefixField, term.bytes())).forEach(sb::addTerm);
return sb.build();
}
SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);
spanQuery.setSlop(slop);
int previousPos = -1;
for (int i = 0; i < terms.length; i++) {
Term[] posTerms = terms[i];
int posInc = positions[i] - previousPos;
previousPos = positions[i];
if (posInc > 1) {
spanQuery.addGap(posInc - 1);
}
if (i == lastPos) {
if (posTerms.length == 1) {
FieldMaskingSpanQuery fieldMask = new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, posTerms[0].bytes())), field);
spanQuery.addClause(fieldMask);
} else {
SpanQuery[] queries = Arrays.stream(posTerms).map(term -> new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, term.bytes())), field)).toArray(SpanQuery[]::new);
spanQuery.addClause(new SpanOrQuery(queries));
}
} else {
if (posTerms.length == 1) {
spanQuery.addClause(new SpanTermQuery(posTerms[0]));
} else {
SpanTermQuery[] queries = Arrays.stream(posTerms).map(SpanTermQuery::new).toArray(SpanTermQuery[]::new);
spanQuery.addClause(new SpanOrQuery(queries));
}
}
}
return spanQuery.build();
}
use of org.apache.lucene.queries.spans.SpanOrQuery in project OpenSearch by opensearch-project.
the class SpanBooleanQueryRewriteWithMaxClause method rewrite.
@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() {
@Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
Collection<SpanQuery> queries = collectTerms(reader, query);
if (queries.size() == 0) {
return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString());
} else if (queries.size() == 1) {
return queries.iterator().next();
} else {
return new SpanOrQuery(queries.toArray(new SpanQuery[0]));
}
}
private Collection<SpanQuery> collectTerms(IndexReader reader, MultiTermQuery query) throws IOException {
Set<SpanQuery> queries = new HashSet<>();
IndexReaderContext topReaderContext = reader.getContext();
for (LeafReaderContext context : topReaderContext.leaves()) {
final Terms terms = context.reader().terms(query.getField());
if (terms == null) {
// field does not exist
continue;
}
final TermsEnum termsEnum = getTermsEnum(query, terms, new AttributeSource());
assert termsEnum != null;
if (termsEnum == TermsEnum.EMPTY) {
continue;
}
BytesRef bytes;
while ((bytes = termsEnum.next()) != null) {
if (queries.size() >= maxExpansions) {
if (hardLimit) {
throw new RuntimeException("[" + query.toString() + " ] " + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
} else {
return queries;
}
}
queries.add(new SpanTermQuery(new Term(query.getField(), bytes)));
}
}
return queries;
}
};
return (SpanQuery) delegate.rewrite(reader, query);
}
Aggregations