Search in sources :

Example 86 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorer method termGroups.

/**
 * map each term to the single group that contains it
 */
private HashMap<Term, Integer> termGroups(LinkedHashMap<Term, Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
    HashMap<Term, Integer> tg = new HashMap<>();
    Term[] t = tord.keySet().toArray(new Term[0]);
    for (int i = 0; i < bb.size(); i++) {
        // i is the group no.
        FixedBitSet bits = bb.get(i);
        for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) {
            tg.put(t[ord], i);
        }
    }
    return tg;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FixedBitSet(org.apache.lucene.util.FixedBitSet) Term(org.apache.lucene.index.Term)

Example 87 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorer method ppTermsBitSets.

/**
 * bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set
 */
private ArrayList<FixedBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term, Integer> tord) {
    ArrayList<FixedBitSet> bb = new ArrayList<>(rpp.length);
    for (PhrasePositions pp : rpp) {
        FixedBitSet b = new FixedBitSet(tord.size());
        Integer ord;
        for (Term t : pp.terms) {
            if ((ord = tord.get(t)) != null) {
                b.set(ord);
            }
        }
        bb.add(b);
    }
    return bb;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term)

Example 88 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.

the class VersionLookupTests method testTwoDocuments.

/** 
     * test version lookup with two documents matching the ID
     */
public void testTwoDocuments() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document doc = new Document();
    doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE));
    doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87));
    writer.addDocument(doc);
    writer.addDocument(doc);
    DirectoryReader reader = DirectoryReader.open(writer);
    LeafReaderContext segment = reader.leaves().get(0);
    PerThreadIDAndVersionLookup lookup = new PerThreadIDAndVersionLookup(segment.reader());
    // return the last doc when there are duplicates
    DocIdAndVersion result = lookup.lookup(new BytesRef("6"), null, segment);
    assertNotNull(result);
    assertEquals(87, result.version);
    assertEquals(1, result.docId);
    // delete the first doc only
    FixedBitSet live = new FixedBitSet(2);
    live.set(1);
    result = lookup.lookup(new BytesRef("6"), live, segment);
    assertNotNull(result);
    assertEquals(87, result.version);
    assertEquals(1, result.docId);
    // delete the second doc only
    live.clear(1);
    live.set(0);
    result = lookup.lookup(new BytesRef("6"), live, segment);
    assertNotNull(result);
    assertEquals(87, result.version);
    assertEquals(0, result.docId);
    // delete both docs
    assertNull(lookup.lookup(new BytesRef("6"), new Bits.MatchNoBits(2), segment));
    reader.close();
    writer.close();
    dir.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdAndVersion(org.elasticsearch.common.lucene.uid.Versions.DocIdAndVersion) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 89 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.

the class InternalEngineTests method testConcurrentWritesAndCommits.

// this test writes documents to the engine while concurrently flushing/commit
// and ensuring that the commit points contain the correct sequence number data
public void testConcurrentWritesAndCommits() throws Exception {
    try (Store store = createStore();
        InternalEngine engine = new InternalEngine(config(defaultSettings, store, createTempDir(), newMergePolicy(), new SnapshotDeletionPolicy(NoDeletionPolicy.INSTANCE), IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, null))) {
        final int numIndexingThreads = scaledRandomIntBetween(3, 6);
        final int numDocsPerThread = randomIntBetween(500, 1000);
        final CyclicBarrier barrier = new CyclicBarrier(numIndexingThreads + 1);
        final List<Thread> indexingThreads = new ArrayList<>();
        // create N indexing threads to index documents simultaneously
        for (int threadNum = 0; threadNum < numIndexingThreads; threadNum++) {
            final int threadIdx = threadNum;
            Thread indexingThread = new Thread(() -> {
                try {
                    // wait for all threads to start at the same time
                    barrier.await();
                    // index random number of docs
                    for (int i = 0; i < numDocsPerThread; i++) {
                        final String id = "thread" + threadIdx + "#" + i;
                        ParsedDocument doc = testParsedDocument(id, "test", null, testDocument(), B_1, null);
                        engine.index(indexForDoc(doc));
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            });
            indexingThreads.add(indexingThread);
        }
        // start the indexing threads
        for (Thread thread : indexingThreads) {
            thread.start();
        }
        // wait for indexing threads to all be ready to start
        barrier.await();
        // create random commit points
        boolean doneIndexing;
        do {
            doneIndexing = indexingThreads.stream().filter(Thread::isAlive).count() == 0;
        //engine.flush(); // flush and commit
        } while (doneIndexing == false);
        // now, verify all the commits have the correct docs according to the user commit data
        long prevLocalCheckpoint = SequenceNumbersService.NO_OPS_PERFORMED;
        long prevMaxSeqNo = SequenceNumbersService.NO_OPS_PERFORMED;
        for (IndexCommit commit : DirectoryReader.listCommits(store.directory())) {
            Map<String, String> userData = commit.getUserData();
            long localCheckpoint = userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) ? Long.parseLong(userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) : SequenceNumbersService.NO_OPS_PERFORMED;
            long maxSeqNo = userData.containsKey(SequenceNumbers.MAX_SEQ_NO) ? Long.parseLong(userData.get(SequenceNumbers.MAX_SEQ_NO)) : SequenceNumbersService.UNASSIGNED_SEQ_NO;
            // local checkpoint and max seq no shouldn't go backwards
            assertThat(localCheckpoint, greaterThanOrEqualTo(prevLocalCheckpoint));
            assertThat(maxSeqNo, greaterThanOrEqualTo(prevMaxSeqNo));
            try (IndexReader reader = DirectoryReader.open(commit)) {
                FieldStats stats = SeqNoFieldMapper.SeqNoDefaults.FIELD_TYPE.stats(reader);
                final long highestSeqNo;
                if (stats != null) {
                    highestSeqNo = (long) stats.getMaxValue();
                } else {
                    highestSeqNo = SequenceNumbersService.NO_OPS_PERFORMED;
                }
                // make sure localCheckpoint <= highest seq no found <= maxSeqNo
                assertThat(highestSeqNo, greaterThanOrEqualTo(localCheckpoint));
                assertThat(highestSeqNo, lessThanOrEqualTo(maxSeqNo));
                // make sure all sequence numbers up to and including the local checkpoint are in the index
                FixedBitSet seqNosBitSet = getSeqNosSet(reader, highestSeqNo);
                for (int i = 0; i <= localCheckpoint; i++) {
                    assertTrue("local checkpoint [" + localCheckpoint + "], _seq_no [" + i + "] should be indexed", seqNosBitSet.get(i));
                }
            }
            prevLocalCheckpoint = localCheckpoint;
            prevMaxSeqNo = maxSeqNo;
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Store(org.elasticsearch.index.store.Store) Matchers.containsString(org.hamcrest.Matchers.containsString) SnapshotDeletionPolicy(org.apache.lucene.index.SnapshotDeletionPolicy) LongPoint(org.apache.lucene.document.LongPoint) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ElasticsearchException(org.elasticsearch.ElasticsearchException) IndexCommit(org.apache.lucene.index.IndexCommit) CyclicBarrier(java.util.concurrent.CyclicBarrier) FieldStats(org.elasticsearch.action.fieldstats.FieldStats) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader)

Example 90 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.

the class MultiValueModeTests method randomRootDocs.

private static FixedBitSet randomRootDocs(int maxDoc) {
    FixedBitSet set = new FixedBitSet(maxDoc);
    for (int i = 0; i < maxDoc; ++i) {
        if (randomBoolean()) {
            set.set(i);
        }
    }
    // the last doc must be a root doc
    set.set(maxDoc - 1);
    return set;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9