use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.
the class TermAutomatonQuery method rewrite.
public Query rewrite(IndexReader reader) throws IOException {
if (Operations.isEmpty(det)) {
return new MatchNoDocsQuery();
}
IntsRef single = Operations.getSingleton(det);
if (single != null && single.length == 1) {
return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
}
// TODO: can PhraseQuery really handle multiple terms at the same position? If so, why do we even have MultiPhraseQuery?
// Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
PhraseQuery.Builder pq = new PhraseQuery.Builder();
Transition t = new Transition();
int state = 0;
int pos = 0;
query: while (true) {
int count = det.initTransition(state, t);
if (count == 0) {
if (det.isAccept(state) == false) {
mpq = null;
pq = null;
}
break;
} else if (det.isAccept(state)) {
mpq = null;
pq = null;
break;
}
int dest = -1;
List<Term> terms = new ArrayList<>();
boolean matchesAny = false;
for (int i = 0; i < count; i++) {
det.getNextTransition(t);
if (i == 0) {
dest = t.dest;
} else if (dest != t.dest) {
mpq = null;
pq = null;
break query;
}
matchesAny |= anyTermID >= t.min && anyTermID <= t.max;
if (matchesAny == false) {
for (int termID = t.min; termID <= t.max; termID++) {
terms.add(new Term(field, idToTerm.get(termID)));
}
}
}
if (matchesAny == false) {
mpq.add(terms.toArray(new Term[terms.size()]), pos);
if (pq != null) {
if (terms.size() == 1) {
pq.add(terms.get(0), pos);
} else {
pq = null;
}
}
}
state = dest;
pos++;
}
if (pq != null) {
return pq.build();
} else if (mpq != null) {
return mpq.build();
}
// TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
return this;
}
use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.
the class MemoryDocValuesProducer method getSortedNonIterator.
private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
final FSTEntry entry = fsts.get(field.name);
if (entry.numOrds == 0) {
return DocValues.emptyLegacySorted();
}
FST<Long> instance;
synchronized (this) {
instance = fstInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(entry.offset);
instance = new FST<>(data, PositiveIntOutputs.getSingleton());
if (!merging) {
ramBytesUsed.addAndGet(instance.ramBytesUsed());
fstInstances.put(field.name, instance);
}
}
}
final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
final FST<Long> fst = instance;
// per-thread resources
final BytesReader in = fst.getBytesReader();
final Arc<Long> firstArc = new Arc<>();
final Arc<Long> scratchArc = new Arc<>();
final IntsRefBuilder scratchInts = new IntsRefBuilder();
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
return new LegacySortedDocValues() {
final BytesRefBuilder term = new BytesRefBuilder();
@Override
public int getOrd(int docID) {
return (int) docToOrd.get(docID);
}
@Override
public BytesRef lookupOrd(int ord) {
try {
in.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
return Util.toBytesRef(output, term);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public int lookupTerm(BytesRef key) {
try {
InputOutput<Long> o = fstEnum.seekCeil(key);
if (o == null) {
return -getValueCount() - 1;
} else if (o.input.equals(key)) {
return o.output.intValue();
} else {
return (int) -o.output - 1;
}
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public int getValueCount() {
return (int) entry.numOrds;
}
@Override
public TermsEnum termsEnum() {
return new FSTTermsEnum(fst);
}
};
}
use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.
the class CompletionTokenStream method incrementToken.
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (finiteStrings == null) {
Automaton automaton = toAutomaton();
finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
}
IntsRef string = finiteStrings.next();
if (string == null) {
return false;
}
// now we have UTF-8
Util.toBytesRef(string, bytesAtt.builder());
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
}
if (payload != null) {
payloadAttr.setPayload(this.payload);
}
return true;
}
use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.
the class ContextQuery method toContextAutomaton.
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
if (matchAllContexts || contexts.size() == 0) {
return Operations.concatenate(matchAllAutomaton, sep);
} else {
Automaton contextsAutomaton = null;
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
final ContextMetaData contextMetaData = entry.getValue();
final IntsRef ref = entry.getKey();
Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
if (contextMetaData.exact == false) {
contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
}
contextAutomaton = Operations.concatenate(contextAutomaton, sep);
if (contextsAutomaton == null) {
contextsAutomaton = contextAutomaton;
} else {
contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
}
}
return contextsAutomaton;
}
}
use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.
the class ContextQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores, boost));
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
final TreeSet<Integer> contextLengths = new TreeSet<>();
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
ContextMetaData contextMetaData = entry.getValue();
contextMap.put(entry.getKey(), contextMetaData.boost);
contextLengths.add(entry.getKey().length);
}
int[] contextLengthArray = new int[contextLengths.size()];
final Iterator<Integer> iterator = contextLengths.descendingIterator();
for (int i = 0; iterator.hasNext(); i++) {
contextLengthArray[i] = iterator.next();
}
return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
Aggregations