Search in sources :

Example 26 with BytesRefIterator

use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.

the class AbstractBytesReferenceTestCase method getSinglePageOrNull.

public static BytesRef getSinglePageOrNull(BytesReference ref) throws IOException {
    if (ref.length() > 0) {
        BytesRefIterator iterator = ref.iterator();
        BytesRef next = iterator.next();
        BytesRef retVal = next.clone();
        if (iterator.next() == null) {
            return retVal;
        }
    } else {
        return new BytesRef();
    }
    return null;
}
Also used : BytesRefIterator(org.apache.lucene.util.BytesRefIterator) BytesRef(org.apache.lucene.util.BytesRef)

Example 27 with BytesRefIterator

use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.

the class AbstractBytesReferenceTestCase method testIteratorRandom.

public void testIteratorRandom() throws IOException {
    int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8));
    BytesReference pbr = newBytesReference(length);
    if (randomBoolean()) {
        int sliceOffset = randomIntBetween(0, pbr.length());
        int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset);
        pbr = pbr.slice(sliceOffset, sliceLength);
    }
    if (randomBoolean()) {
        pbr = new BytesArray(pbr.toBytesRef());
    }
    BytesRefIterator iterator = pbr.iterator();
    BytesRef ref = null;
    BytesRefBuilder builder = new BytesRefBuilder();
    while ((ref = iterator.next()) != null) {
        builder.append(ref);
    }
    assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes);
}
Also used : BytesRefIterator(org.apache.lucene.util.BytesRefIterator) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 28 with BytesRefIterator

use of org.apache.lucene.util.BytesRefIterator in project elasticsearch by elastic.

the class OrdinalsBuilder method buildFromTerms.

/**
     * This method iterates all terms in the given {@link TermsEnum} and
     * associates each terms ordinal with the terms documents. The caller must
     * exhaust the returned {@link BytesRefIterator} which returns all values
     * where the first returned value is associated with the ordinal <tt>1</tt>
     * etc.
     * <p>
     * If the {@link TermsEnum} contains prefix coded numerical values the terms
     * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
     * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
     * the {@link TermsEnum} is not wrapped the returned
     * {@link BytesRefIterator} will contain partial precision terms rather than
     * only full-precision terms.
     * </p>
     */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {

        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
Also used : BytesRefIterator(org.apache.lucene.util.BytesRefIterator) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Example 29 with BytesRefIterator

use of org.apache.lucene.util.BytesRefIterator in project lucene-solr by apache.

the class SpellChecker method indexDictionary.

/**
   * Indexes the data from the given {@link Dictionary}.
   * @param dict Dictionary to index
   * @param config {@link IndexWriterConfig} to use
   * @param fullMerge whether or not the spellcheck index should be fully merged
   * @throws AlreadyClosedException if the Spellchecker is already closed
   * @throws IOException If there is a low-level I/O error.
   */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
    synchronized (modifyCurrentIndexLock) {
        ensureOpen();
        final Directory dir = this.spellIndex;
        final IndexWriter writer = new IndexWriter(dir, config);
        IndexSearcher indexSearcher = obtainSearcher();
        final List<TermsEnum> termsEnums = new ArrayList<>();
        final IndexReader reader = searcher.getIndexReader();
        if (reader.maxDoc() > 0) {
            for (final LeafReaderContext ctx : reader.leaves()) {
                Terms terms = ctx.reader().terms(F_WORD);
                if (terms != null)
                    termsEnums.add(terms.iterator());
            }
        }
        boolean isEmpty = termsEnums.isEmpty();
        try {
            BytesRefIterator iter = dict.getEntryIterator();
            BytesRef currentTerm;
            terms: while ((currentTerm = iter.next()) != null) {
                String word = currentTerm.utf8ToString();
                int len = word.length();
                if (len < 3) {
                    // too short we bail but "too long" is fine...
                    continue;
                }
                if (!isEmpty) {
                    for (TermsEnum te : termsEnums) {
                        if (te.seekExact(currentTerm)) {
                            continue terms;
                        }
                    }
                }
                // ok index the word
                Document doc = createDocument(word, getMin(len), getMax(len));
                writer.addDocument(doc);
            }
        } finally {
            releaseSearcher(indexSearcher);
        }
        if (fullMerge) {
            writer.forceMerge(1);
        }
        // close writer
        writer.close();
        // TODO: this isn't that great, maybe in the future SpellChecker should take
        // IWC in its ctor / keep its writer open?
        // also re-open the spell index to see our own changes when the next suggestion
        // is fetched:
        swapSearcher(dir);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) Terms(org.apache.lucene.index.Terms) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) BytesRefIterator(org.apache.lucene.util.BytesRefIterator) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 30 with BytesRefIterator

use of org.apache.lucene.util.BytesRefIterator in project lucene-solr by apache.

the class TestBytesRefArray method testAppend.

public void testAppend() throws IOException {
    Random random = random();
    BytesRefArray list = new BytesRefArray(Counter.newCounter());
    List<String> stringList = new ArrayList<>();
    for (int j = 0; j < 2; j++) {
        if (j > 0 && random.nextBoolean()) {
            list.clear();
            stringList.clear();
        }
        int entries = atLeast(500);
        BytesRefBuilder spare = new BytesRefBuilder();
        int initSize = list.size();
        for (int i = 0; i < entries; i++) {
            String randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random);
            spare.copyChars(randomRealisticUnicodeString);
            assertEquals(i + initSize, list.append(spare.get()));
            stringList.add(randomRealisticUnicodeString);
        }
        for (int i = 0; i < entries; i++) {
            assertNotNull(list.get(spare, i));
            assertEquals("entry " + i + " doesn't match", stringList.get(i), spare.get().utf8ToString());
        }
        // check random
        for (int i = 0; i < entries; i++) {
            int e = random.nextInt(entries);
            assertNotNull(list.get(spare, e));
            assertEquals("entry " + i + " doesn't match", stringList.get(e), spare.get().utf8ToString());
        }
        for (int i = 0; i < 2; i++) {
            BytesRefIterator iterator = list.iterator();
            for (String string : stringList) {
                assertEquals(string, iterator.next().utf8ToString());
            }
        }
    }
}
Also used : BytesRefIterator(org.apache.lucene.util.BytesRefIterator)

Aggregations

BytesRefIterator (org.apache.lucene.util.BytesRefIterator)37 BytesRef (org.apache.lucene.util.BytesRef)35 IOException (java.io.IOException)10 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)10 ArrayList (java.util.ArrayList)4 ByteBuf (io.netty.buffer.ByteBuf)2 CompositeByteBuf (io.netty.buffer.CompositeByteBuf)2 ByteBuffer (java.nio.ByteBuffer)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 Directory (org.apache.lucene.store.Directory)2 IndexOutput (org.apache.lucene.store.IndexOutput)2 DataFormatException (java.util.zip.DataFormatException)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Document (org.apache.lucene.document.Document)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 PostingsEnum (org.apache.lucene.index.PostingsEnum)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1