Search in sources :

Example 91 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project elasticsearch by elastic.

the class DirectCandidateGenerator method postFilter.

protected void postFilter(final Candidate candidate, final CharsRefBuilder spare, BytesRefBuilder byteSpare, final List<Candidate> candidates) throws IOException {
    if (postFilter == null) {
        candidates.add(candidate);
    } else {
        final BytesRefBuilder result = byteSpare;
        analyze(postFilter, candidate.term, field, new TokenConsumer() {

            @Override
            public void nextToken() throws IOException {
                this.fillBytesRef(result);
                if (posIncAttr.getPositionIncrement() > 0 && result.get().bytesEquals(candidate.term)) {
                    BytesRef term = result.toBytesRef();
                    // We should not use frequency(term) here because it will analyze the term again
                    // If preFilter and postFilter are the same analyzer it would fail.
                    long freq = internalFrequency(term);
                    candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance, score(candidate.frequency, candidate.stringDistance, dictSize), false));
                } else {
                    candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood, score(candidate.frequency, candidate.stringDistance, dictSize), false));
                }
            }
        }, spare);
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IOException(java.io.IOException) BytesRef(org.apache.lucene.util.BytesRef)

Example 92 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project elasticsearch by elastic.

the class NoisyChannelSpellChecker method getCorrections.

public Result getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors, int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException {
    final List<CandidateSet> candidateSetsList = new ArrayList<>();
    DirectCandidateGenerator.analyze(stream, new DirectCandidateGenerator.TokenConsumer() {

        CandidateSet currentSet = null;

        private TypeAttribute typeAttribute;

        private final BytesRefBuilder termsRef = new BytesRefBuilder();

        private boolean anyUnigram = false;

        private boolean anyTokens = false;

        @Override
        public void reset(TokenStream stream) {
            super.reset(stream);
            typeAttribute = stream.addAttribute(TypeAttribute.class);
        }

        @Override
        public void nextToken() throws IOException {
            anyTokens = true;
            BytesRef term = fillBytesRef(termsRef);
            if (requireUnigram && typeAttribute.type() == ShingleFilter.DEFAULT_TOKEN_TYPE) {
                return;
            }
            anyUnigram = true;
            if (posIncAttr.getPositionIncrement() == 0 && typeAttribute.type() == SynonymFilter.TYPE_SYNONYM) {
                assert currentSet != null;
                long freq = 0;
                if ((freq = generator.frequency(term)) > 0) {
                    currentSet.addOneCandidate(generator.createCandidate(BytesRef.deepCopyOf(term), freq, realWordLikelihood));
                }
            } else {
                if (currentSet != null) {
                    candidateSetsList.add(currentSet);
                }
                currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term), true));
            }
        }

        @Override
        public void end() {
            if (currentSet != null) {
                candidateSetsList.add(currentSet);
            }
            if (requireUnigram && !anyUnigram && anyTokens) {
                throw new IllegalStateException("At least one unigram is required but all tokens were ngrams");
            }
        }
    });
    if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) {
        return Result.EMPTY;
    }
    for (CandidateSet candidateSet : candidateSetsList) {
        generator.drawCandidates(candidateSet);
    }
    double cutoffScore = Double.MIN_VALUE;
    CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
    CandidateSet[] candidateSets = candidateSetsList.toArray(new CandidateSet[candidateSetsList.size()]);
    if (confidence > 0.0) {
        Candidate[] candidates = new Candidate[candidateSets.length];
        for (int i = 0; i < candidates.length; i++) {
            candidates[i] = candidateSets[i].originalTerm;
        }
        double inputPhraseScore = scorer.score(candidates, candidateSets);
        cutoffScore = inputPhraseScore * confidence;
    }
    Correction[] bestCandidates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore);
    return new Result(bestCandidates, cutoffScore);
}
Also used : Candidate(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) TokenStream(org.apache.lucene.analysis.TokenStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CandidateSet(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) BytesRef(org.apache.lucene.util.BytesRef)

Example 93 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project elasticsearch by elastic.

the class SimpleLuceneTests method testSimpleNumericOps.

public void testSimpleNumericOps() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    Document doc = searcher.doc(topDocs.scoreDocs[0].doc);
    IndexableField f = doc.getField("test");
    assertThat(f.stringValue(), equalTo("2"));
    BytesRefBuilder bytes = new BytesRefBuilder();
    LegacyNumericUtils.intToPrefixCoded(2, 0, bytes);
    topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1);
    doc = searcher.doc(topDocs.scoreDocs[0].doc);
    f = doc.getField("test");
    assertThat(f.stringValue(), equalTo("2"));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) TopDocs(org.apache.lucene.search.TopDocs) IndexableField(org.apache.lucene.index.IndexableField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LegacyIntField(org.apache.lucene.document.LegacyIntField)

Example 94 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project elasticsearch by elastic.

the class BytesBinaryDVAtomicFieldData method getBytesValues.

@Override
public SortedBinaryDocValues getBytesValues() {
    return new SortedBinaryDocValues() {

        int count;

        BytesRefBuilder[] refs = new BytesRefBuilder[0];

        final ByteArrayDataInput in = new ByteArrayDataInput();

        @Override
        public void setDocument(int docId) {
            final BytesRef bytes = values.get(docId);
            in.reset(bytes.bytes, bytes.offset, bytes.length);
            if (bytes.length == 0) {
                count = 0;
            } else {
                count = in.readVInt();
                if (count > refs.length) {
                    final int previousLength = refs.length;
                    refs = Arrays.copyOf(refs, ArrayUtil.oversize(count, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                    for (int i = previousLength; i < refs.length; ++i) {
                        refs[i] = new BytesRefBuilder();
                    }
                }
                for (int i = 0; i < count; ++i) {
                    final int length = in.readVInt();
                    final BytesRefBuilder scratch = refs[i];
                    scratch.grow(length);
                    in.readBytes(scratch.bytes(), 0, length);
                    scratch.setLength(length);
                }
            }
        }

        @Override
        public int count() {
            return count;
        }

        @Override
        public BytesRef valueAt(int index) {
            return refs[index].get();
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Example 95 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project elasticsearch by elastic.

the class BytesRefHashTests method assertAllIn.

private void assertAllIn(Set<String> strings, BytesRefHash hash) {
    BytesRefBuilder ref = new BytesRefBuilder();
    BytesRef scratch = new BytesRef();
    long count = hash.size();
    for (String string : strings) {
        ref.copyChars(string);
        // add again to check duplicates
        long key = hash.add(ref.get());
        assertEquals(string, hash.get((-key) - 1, scratch).utf8ToString());
        assertEquals(count, hash.size());
        assertTrue("key: " + key + " count: " + count + " string: " + string, key < count);
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)150 BytesRef (org.apache.lucene.util.BytesRef)79 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)17 Term (org.apache.lucene.index.Term)16 HashSet (java.util.HashSet)15 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)14 FieldType (org.apache.solr.schema.FieldType)14 IndexInput (org.apache.lucene.store.IndexInput)12 BytesRefIterator (org.apache.lucene.util.BytesRefIterator)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 IntsRef (org.apache.lucene.util.IntsRef)10 SchemaField (org.apache.solr.schema.SchemaField)10 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)9 ParseException (java.text.ParseException)8 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)8 DecimalFormat (java.text.DecimalFormat)7 HashMap (java.util.HashMap)7 Map (java.util.Map)7 Directory (org.apache.lucene.store.Directory)7