Search in sources :

Example 51 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TermAutomatonQuery method rewrite.

public Query rewrite(IndexReader reader) throws IOException {
    if (Operations.isEmpty(det)) {
        return new MatchNoDocsQuery();
    }
    IntsRef single = Operations.getSingleton(det);
    if (single != null && single.length == 1) {
        return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
    }
    // TODO: can PhraseQuery really handle multiple terms at the same position?  If so, why do we even have MultiPhraseQuery?
    // Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
    MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
    PhraseQuery.Builder pq = new PhraseQuery.Builder();
    Transition t = new Transition();
    int state = 0;
    int pos = 0;
    query: while (true) {
        int count = det.initTransition(state, t);
        if (count == 0) {
            if (det.isAccept(state) == false) {
                mpq = null;
                pq = null;
            }
            break;
        } else if (det.isAccept(state)) {
            mpq = null;
            pq = null;
            break;
        }
        int dest = -1;
        List<Term> terms = new ArrayList<>();
        boolean matchesAny = false;
        for (int i = 0; i < count; i++) {
            det.getNextTransition(t);
            if (i == 0) {
                dest = t.dest;
            } else if (dest != t.dest) {
                mpq = null;
                pq = null;
                break query;
            }
            matchesAny |= anyTermID >= t.min && anyTermID <= t.max;
            if (matchesAny == false) {
                for (int termID = t.min; termID <= t.max; termID++) {
                    terms.add(new Term(field, idToTerm.get(termID)));
                }
            }
        }
        if (matchesAny == false) {
            mpq.add(terms.toArray(new Term[terms.size()]), pos);
            if (pq != null) {
                if (terms.size() == 1) {
                    pq.add(terms.get(0), pos);
                } else {
                    pq = null;
                }
            }
        }
        state = dest;
        pos++;
    }
    if (pq != null) {
        return pq.build();
    } else if (mpq != null) {
        return mpq.build();
    }
    // TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
    return this;
}
Also used : Term(org.apache.lucene.index.Term) Transition(org.apache.lucene.util.automaton.Transition) ArrayList(java.util.ArrayList) List(java.util.List) IntsRef(org.apache.lucene.util.IntsRef)

Example 52 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class MemoryDocValuesProducer method getSortedNonIterator.

private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
        return DocValues.emptyLegacySorted();
    }
    FST<Long> instance;
    synchronized (this) {
        instance = fstInstances.get(field.name);
        if (instance == null) {
            IndexInput data = this.data.clone();
            data.seek(entry.offset);
            instance = new FST<>(data, PositiveIntOutputs.getSingleton());
            if (!merging) {
                ramBytesUsed.addAndGet(instance.ramBytesUsed());
                fstInstances.put(field.name, instance);
            }
        }
    }
    final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
    final FST<Long> fst = instance;
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    return new LegacySortedDocValues() {

        final BytesRefBuilder term = new BytesRefBuilder();

        @Override
        public int getOrd(int docID) {
            return (int) docToOrd.get(docID);
        }

        @Override
        public BytesRef lookupOrd(int ord) {
            try {
                in.setPosition(0);
                fst.getFirstArc(firstArc);
                IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
                return Util.toBytesRef(output, term);
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public int lookupTerm(BytesRef key) {
            try {
                InputOutput<Long> o = fstEnum.seekCeil(key);
                if (o == null) {
                    return -getValueCount() - 1;
                } else if (o.input.equals(key)) {
                    return o.output.intValue();
                } else {
                    return (int) -o.output - 1;
                }
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public int getValueCount() {
            return (int) entry.numOrds;
        }

        @Override
        public TermsEnum termsEnum() {
            return new FSTTermsEnum(fst);
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IOException(java.io.IOException) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefFSTEnum(org.apache.lucene.util.fst.BytesRefFSTEnum) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicLong(java.util.concurrent.atomic.AtomicLong) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef)

Example 53 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class CompletionTokenStream method incrementToken.

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();
    if (finiteStrings == null) {
        Automaton automaton = toAutomaton();
        finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    }
    IntsRef string = finiteStrings.next();
    if (string == null) {
        return false;
    }
    // now we have UTF-8
    Util.toBytesRef(string, bytesAtt.builder());
    if (charTermAttribute != null) {
        charTermAttribute.setLength(0);
        charTermAttribute.append(bytesAtt.toUTF16());
    }
    if (payload != null) {
        payloadAttr.setPayload(this.payload);
    }
    return true;
}
Also used : Automaton(org.apache.lucene.util.automaton.Automaton) TokenStreamToAutomaton(org.apache.lucene.analysis.TokenStreamToAutomaton) LimitedFiniteStringsIterator(org.apache.lucene.util.automaton.LimitedFiniteStringsIterator) IntsRef(org.apache.lucene.util.IntsRef)

Example 54 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class ContextQuery method toContextAutomaton.

private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
    final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
    final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
    if (matchAllContexts || contexts.size() == 0) {
        return Operations.concatenate(matchAllAutomaton, sep);
    } else {
        Automaton contextsAutomaton = null;
        for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
            final ContextMetaData contextMetaData = entry.getValue();
            final IntsRef ref = entry.getKey();
            Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
            if (contextMetaData.exact == false) {
                contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
            }
            contextAutomaton = Operations.concatenate(contextAutomaton, sep);
            if (contextsAutomaton == null) {
                contextsAutomaton = contextAutomaton;
            } else {
                contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
            }
        }
        return contextsAutomaton;
    }
}
Also used : Automaton(org.apache.lucene.util.automaton.Automaton) IntsRef(org.apache.lucene.util.IntsRef) HashMap(java.util.HashMap) Map(java.util.Map)

Example 55 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class ContextQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores, boost));
    // if separators are preserved the fst contains a SEP_LABEL
    // behind each gap. To have a matching automaton, we need to
    // include the SEP_LABEL in the query as well
    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
    Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
    Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
    contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
    final TreeSet<Integer> contextLengths = new TreeSet<>();
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
        ContextMetaData contextMetaData = entry.getValue();
        contextMap.put(entry.getKey(), contextMetaData.boost);
        contextLengths.add(entry.getKey().length);
    }
    int[] contextLengthArray = new int[contextLengths.size()];
    final Iterator<Integer> iterator = contextLengths.descendingIterator();
    for (int i = 0; iterator.hasNext(); i++) {
        contextLengthArray[i] = iterator.next();
    }
    return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
Also used : Automaton(org.apache.lucene.util.automaton.Automaton) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) IntsRef(org.apache.lucene.util.IntsRef) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

IntsRef (org.apache.lucene.util.IntsRef)63 BytesRef (org.apache.lucene.util.BytesRef)19 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)19 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)13 Automaton (org.apache.lucene.util.automaton.Automaton)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)12 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)9 IOException (java.io.IOException)7 Directory (org.apache.lucene.store.Directory)7 HashMap (java.util.HashMap)5 FiniteStringsIterator (org.apache.lucene.util.automaton.FiniteStringsIterator)5 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)5 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 FilterInputStream (java.io.FilterInputStream)4 InputStream (java.io.InputStream)4 Map (java.util.Map)4 Random (java.util.Random)4 TokenStream (org.apache.lucene.analysis.TokenStream)4