Search in sources :

Example 36 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class FSTCompletionBuilder method buildAutomaton.

/**
   * Builds the final automaton from a list of entries.
   */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
    // Build the automaton.
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    final Object empty = outputs.getNoOutput();
    final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, true, 15);
    BytesRefBuilder scratch = new BytesRefBuilder();
    BytesRef entry;
    final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
    int count = 0;
    BytesRefIterator iter = sorter.iterator();
    while ((entry = iter.next()) != null) {
        count++;
        if (scratch.get().compareTo(entry) != 0) {
            builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
            scratch.copyBytes(entry);
        }
    }
    return count == 0 ? null : builder.finish();
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) BytesRefIterator(org.apache.lucene.util.BytesRefIterator) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 37 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class WFSTCompletionLookup method build.

@Override
public void build(InputIterator iterator) throws IOException {
    if (iterator.hasPayloads()) {
        throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    if (iterator.hasContexts()) {
        throw new IllegalArgumentException("this suggester doesn't support contexts");
    }
    count = 0;
    BytesRef scratch = new BytesRef();
    InputIterator iter = new WFSTInputIterator(tempDir, tempFileNamePrefix, iterator);
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    BytesRefBuilder previous = null;
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    while ((scratch = iter.next()) != null) {
        long cost = iter.weight();
        if (previous == null) {
            previous = new BytesRefBuilder();
        } else if (scratch.equals(previous.get())) {
            // for duplicate suggestions, the best weight is actually
            continue;
        // added
        }
        Util.toIntsRef(scratch, scratchInts);
        builder.add(scratchInts.get(), cost);
        previous.copyBytes(scratch);
        count++;
    }
    fst = builder.finish();
}
Also used : InputIterator(org.apache.lucene.search.suggest.InputIterator) SortedInputIterator(org.apache.lucene.search.suggest.SortedInputIterator) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) PositiveIntOutputs(org.apache.lucene.util.fst.PositiveIntOutputs) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) Builder(org.apache.lucene.util.fst.Builder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 38 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class WFSTCompletionLookup method lookup.

@Override
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
    if (contexts != null) {
        throw new IllegalArgumentException("this suggester doesn't support contexts");
    }
    assert num > 0;
    if (onlyMorePopular) {
        throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
    }
    if (fst == null) {
        return Collections.emptyList();
    }
    BytesRefBuilder scratch = new BytesRefBuilder();
    scratch.copyChars(key);
    int prefixLength = scratch.length();
    Arc<Long> arc = new Arc<>();
    // match the prefix portion exactly
    Long prefixOutput = null;
    try {
        prefixOutput = lookupPrefix(scratch.get(), arc);
    } catch (IOException bogus) {
        throw new RuntimeException(bogus);
    }
    if (prefixOutput == null) {
        return Collections.emptyList();
    }
    List<LookupResult> results = new ArrayList<>(num);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (exactFirst && arc.isFinal()) {
        spare.copyUTF8Bytes(scratch.get());
        results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
        if (--num == 0) {
            // that was quick
            return results;
        }
    }
    // complete top-N
    TopResults<Long> completions = null;
    try {
        completions = Util.shortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
        assert completions.isComplete;
    } catch (IOException bogus) {
        throw new RuntimeException(bogus);
    }
    BytesRefBuilder suffix = new BytesRefBuilder();
    for (Result<Long> completion : completions) {
        scratch.setLength(prefixLength);
        // append suffix
        Util.toBytesRef(completion.input, suffix);
        scratch.append(suffix);
        spare.copyUTF8Bytes(scratch.get());
        results.add(new LookupResult(spare.toString(), decodeWeight(completion.output)));
    }
    return results;
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Arc(org.apache.lucene.util.fst.FST.Arc) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder)

Example 39 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class ExpandComponent method getGroupQuery.

private Query getGroupQuery(String fname, FieldType ft, int size, LongHashSet groupSet) {
    BytesRef[] bytesRefs = new BytesRef[size];
    BytesRefBuilder term = new BytesRefBuilder();
    Iterator<LongCursor> it = groupSet.iterator();
    int index = -1;
    while (it.hasNext()) {
        LongCursor cursor = it.next();
        String stringVal = numericToString(ft, cursor.value);
        ft.readableToIndexed(stringVal, term);
        bytesRefs[++index] = term.toBytesRef();
    }
    return new SolrConstantScoreQuery(new QueryWrapperFilter(new TermInSetQuery(fname, bytesRefs)));
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LongCursor(com.carrotsearch.hppc.cursors.LongCursor) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) QueryWrapperFilter(org.apache.solr.search.QueryWrapperFilter) BytesRef(org.apache.lucene.util.BytesRef)

Example 40 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class EnumField method storedToIndexed.

/**
   * {@inheritDoc}
   */
@Override
public String storedToIndexed(IndexableField f) {
    final Number val = f.numericValue();
    if (val == null)
        return null;
    final BytesRefBuilder bytes = new BytesRefBuilder();
    LegacyNumericUtils.intToPrefixCoded(val.intValue(), 0, bytes);
    return bytes.get().utf8ToString();
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder)

Aggregations

BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)141 BytesRef (org.apache.lucene.util.BytesRef)73 ArrayList (java.util.ArrayList)20 IOException (java.io.IOException)16 HashSet (java.util.HashSet)14 Term (org.apache.lucene.index.Term)14 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)14 FieldType (org.apache.solr.schema.FieldType)13 IndexInput (org.apache.lucene.store.IndexInput)12 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 IntsRef (org.apache.lucene.util.IntsRef)10 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)9 SchemaField (org.apache.solr.schema.SchemaField)9 ParseException (java.text.ParseException)8 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)8 DecimalFormat (java.text.DecimalFormat)7 HashMap (java.util.HashMap)7 Map (java.util.Map)7 Directory (org.apache.lucene.store.Directory)7 IndexOutput (org.apache.lucene.store.IndexOutput)6