Search in sources :

Example 1 with TermIterator

use of org.apache.lucene.index.PrefixCodedTerms.TermIterator in project lucene-solr by apache.

the class TestPrefixCodedTerms method testOne.

public void testOne() {
    Term term = new Term("foo", "bogus");
    PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
    b.add(term);
    PrefixCodedTerms pb = b.finish();
    TermIterator iter = pb.iterator();
    assertNotNull(iter.next());
    assertEquals("foo", iter.field());
    assertEquals("bogus", iter.bytes.utf8ToString());
    assertNull(iter.next());
}
Also used : TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator)

Example 2 with TermIterator

use of org.apache.lucene.index.PrefixCodedTerms.TermIterator in project lucene-solr by apache.

the class TestPrefixCodedTerms method testRandom.

public void testRandom() {
    Set<Term> terms = new TreeSet<>();
    int nterms = atLeast(10000);
    for (int i = 0; i < nterms; i++) {
        Term term = new Term(TestUtil.randomUnicodeString(random(), 2), TestUtil.randomUnicodeString(random()));
        terms.add(term);
    }
    PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
    for (Term ref : terms) {
        b.add(ref);
    }
    PrefixCodedTerms pb = b.finish();
    TermIterator iter = pb.iterator();
    Iterator<Term> expected = terms.iterator();
    assertEquals(terms.size(), pb.size());
    //System.out.println("TEST: now iter");
    while (iter.next() != null) {
        assertTrue(expected.hasNext());
        assertEquals(expected.next(), new Term(iter.field(), iter.bytes));
    }
    assertFalse(expected.hasNext());
}
Also used : TreeSet(java.util.TreeSet) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator)

Example 3 with TermIterator

use of org.apache.lucene.index.PrefixCodedTerms.TermIterator in project lucene-solr by apache.

the class TermInSetQuery method rewrite.

@Override
public Query rewrite(IndexReader reader) throws IOException {
    final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
    if (termData.size() <= threshold) {
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        TermIterator iterator = termData.iterator();
        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
            bq.add(new TermQuery(new Term(iterator.field(), BytesRef.deepCopyOf(term))), Occur.SHOULD);
        }
        return new ConstantScoreQuery(bq.build());
    }
    return super.rewrite(reader);
}
Also used : DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator) Term(org.apache.lucene.index.Term) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with TermIterator

use of org.apache.lucene.index.PrefixCodedTerms.TermIterator in project lucene-solr by apache.

the class TestDocumentsWriterDeleteQueue method testStressDeleteQueue.

public void testStressDeleteQueue() throws InterruptedException {
    DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
    Set<Term> uniqueValues = new HashSet<>();
    final int size = 10000 + random().nextInt(500) * RANDOM_MULTIPLIER;
    Integer[] ids = new Integer[size];
    for (int i = 0; i < ids.length; i++) {
        ids[i] = random().nextInt();
        uniqueValues.add(new Term("id", ids[i].toString()));
    }
    CountDownLatch latch = new CountDownLatch(1);
    AtomicInteger index = new AtomicInteger(0);
    final int numThreads = 2 + random().nextInt(5);
    UpdateThread[] threads = new UpdateThread[numThreads];
    for (int i = 0; i < threads.length; i++) {
        threads[i] = new UpdateThread(queue, index, ids, latch);
        threads[i].start();
    }
    latch.countDown();
    for (int i = 0; i < threads.length; i++) {
        threads[i].join();
    }
    for (UpdateThread updateThread : threads) {
        DeleteSlice slice = updateThread.slice;
        queue.updateSlice(slice);
        BufferedUpdates deletes = updateThread.deletes;
        slice.apply(deletes, BufferedUpdates.MAX_INT);
        assertEquals(uniqueValues, deletes.terms.keySet());
    }
    queue.tryApplyGlobalSlice();
    Set<Term> frozenSet = new HashSet<>();
    BytesRefBuilder builder = new BytesRefBuilder();
    TermIterator iter = queue.freezeGlobalBuffer(null).termIterator();
    while (iter.next() != null) {
        builder.copyBytes(iter.bytes);
        frozenSet.add(new Term(iter.field(), builder.toBytesRef()));
    }
    assertEquals("num deletes must be 0 after freeze", 0, queue.numGlobalTermDeletes());
    assertEquals(uniqueValues.size(), frozenSet.size());
    assertEquals(uniqueValues, frozenSet);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DeleteSlice(org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice) HashSet(java.util.HashSet)

Example 5 with TermIterator

use of org.apache.lucene.index.PrefixCodedTerms.TermIterator in project lucene-solr by apache.

the class MergedPrefixCodedTermsIterator method next.

@Override
public BytesRef next() {
    if (termQueue.size() == 0) {
        // No more terms in current field:
        if (fieldQueue.size() == 0) {
            // No more fields:
            field = null;
            return null;
        }
        // Transfer all iterators on the next field into the term queue:
        TermIterator top = fieldQueue.pop();
        termQueue.add(top);
        field = top.field;
        assert field != null;
        while (fieldQueue.size() != 0 && fieldQueue.top().field.equals(top.field)) {
            TermIterator iter = fieldQueue.pop();
            assert iter.field.equals(field);
            // TODO: a little bit evil; we do this so we can == on field down below:
            iter.field = field;
            termQueue.add(iter);
        }
        return termQueue.top().bytes;
    } else {
        TermIterator top = termQueue.top();
        if (top.next() == null) {
            termQueue.pop();
        } else if (top.field() != field) {
            // Field changed
            termQueue.pop();
            fieldQueue.add(top);
        } else {
            termQueue.updateTop();
        }
        if (termQueue.size() == 0) {
            // Recurse (just once) to go to next field:                                                                                                                                        
            return next();
        } else {
            // Still terms left in this field
            return termQueue.top().bytes;
        }
    }
}
Also used : TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator)

Aggregations

TermIterator (org.apache.lucene.index.PrefixCodedTerms.TermIterator)14 BytesRef (org.apache.lucene.util.BytesRef)8 Term (org.apache.lucene.index.Term)4 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)3 HashSet (java.util.HashSet)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 DeleteSlice (org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice)2 LeafReader (org.apache.lucene.index.LeafReader)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Set (java.util.Set)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 Fields (org.apache.lucene.index.Fields)1 PointValues (org.apache.lucene.index.PointValues)1 PostingsEnum (org.apache.lucene.index.PostingsEnum)1 PrefixCodedTerms (org.apache.lucene.index.PrefixCodedTerms)1