use of org.apache.lucene.util.FixedBitSet in project OpenGrok by OpenGrok.
the class CustomSloppyPhraseScorer method termGroups.
/**
* map each term to the single group that contains it
*/
private HashMap<Term, Integer> termGroups(LinkedHashMap<Term, Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
HashMap<Term, Integer> tg = new HashMap<>();
Term[] t = tord.keySet().toArray(new Term[0]);
for (int i = 0; i < bb.size(); i++) {
// i is the group no.
FixedBitSet bits = bb.get(i);
for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) {
tg.put(t[ord], i);
}
}
return tg;
}
use of org.apache.lucene.util.FixedBitSet in project OpenGrok by OpenGrok.
the class CustomSloppyPhraseScorer method ppTermsBitSets.
/**
* bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set
*/
private ArrayList<FixedBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term, Integer> tord) {
ArrayList<FixedBitSet> bb = new ArrayList<>(rpp.length);
for (PhrasePositions pp : rpp) {
FixedBitSet b = new FixedBitSet(tord.size());
Integer ord;
for (Term t : pp.terms) {
if ((ord = tord.get(t)) != null) {
b.set(ord);
}
}
bb.add(b);
}
return bb;
}
use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.
the class VersionLookupTests method testTwoDocuments.
/**
* test version lookup with two documents matching the ID
*/
public void testTwoDocuments() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87));
writer.addDocument(doc);
writer.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(writer);
LeafReaderContext segment = reader.leaves().get(0);
PerThreadIDAndVersionLookup lookup = new PerThreadIDAndVersionLookup(segment.reader());
// return the last doc when there are duplicates
DocIdAndVersion result = lookup.lookup(new BytesRef("6"), null, segment);
assertNotNull(result);
assertEquals(87, result.version);
assertEquals(1, result.docId);
// delete the first doc only
FixedBitSet live = new FixedBitSet(2);
live.set(1);
result = lookup.lookup(new BytesRef("6"), live, segment);
assertNotNull(result);
assertEquals(87, result.version);
assertEquals(1, result.docId);
// delete the second doc only
live.clear(1);
live.set(0);
result = lookup.lookup(new BytesRef("6"), live, segment);
assertNotNull(result);
assertEquals(87, result.version);
assertEquals(0, result.docId);
// delete both docs
assertNull(lookup.lookup(new BytesRef("6"), new Bits.MatchNoBits(2), segment));
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.
the class InternalEngineTests method testConcurrentWritesAndCommits.
// this test writes documents to the engine while concurrently flushing/commit
// and ensuring that the commit points contain the correct sequence number data
public void testConcurrentWritesAndCommits() throws Exception {
try (Store store = createStore();
InternalEngine engine = new InternalEngine(config(defaultSettings, store, createTempDir(), newMergePolicy(), new SnapshotDeletionPolicy(NoDeletionPolicy.INSTANCE), IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, null))) {
final int numIndexingThreads = scaledRandomIntBetween(3, 6);
final int numDocsPerThread = randomIntBetween(500, 1000);
final CyclicBarrier barrier = new CyclicBarrier(numIndexingThreads + 1);
final List<Thread> indexingThreads = new ArrayList<>();
// create N indexing threads to index documents simultaneously
for (int threadNum = 0; threadNum < numIndexingThreads; threadNum++) {
final int threadIdx = threadNum;
Thread indexingThread = new Thread(() -> {
try {
// wait for all threads to start at the same time
barrier.await();
// index random number of docs
for (int i = 0; i < numDocsPerThread; i++) {
final String id = "thread" + threadIdx + "#" + i;
ParsedDocument doc = testParsedDocument(id, "test", null, testDocument(), B_1, null);
engine.index(indexForDoc(doc));
}
} catch (Exception e) {
throw new RuntimeException(e);
}
});
indexingThreads.add(indexingThread);
}
// start the indexing threads
for (Thread thread : indexingThreads) {
thread.start();
}
// wait for indexing threads to all be ready to start
barrier.await();
// create random commit points
boolean doneIndexing;
do {
doneIndexing = indexingThreads.stream().filter(Thread::isAlive).count() == 0;
//engine.flush(); // flush and commit
} while (doneIndexing == false);
// now, verify all the commits have the correct docs according to the user commit data
long prevLocalCheckpoint = SequenceNumbersService.NO_OPS_PERFORMED;
long prevMaxSeqNo = SequenceNumbersService.NO_OPS_PERFORMED;
for (IndexCommit commit : DirectoryReader.listCommits(store.directory())) {
Map<String, String> userData = commit.getUserData();
long localCheckpoint = userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) ? Long.parseLong(userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) : SequenceNumbersService.NO_OPS_PERFORMED;
long maxSeqNo = userData.containsKey(SequenceNumbers.MAX_SEQ_NO) ? Long.parseLong(userData.get(SequenceNumbers.MAX_SEQ_NO)) : SequenceNumbersService.UNASSIGNED_SEQ_NO;
// local checkpoint and max seq no shouldn't go backwards
assertThat(localCheckpoint, greaterThanOrEqualTo(prevLocalCheckpoint));
assertThat(maxSeqNo, greaterThanOrEqualTo(prevMaxSeqNo));
try (IndexReader reader = DirectoryReader.open(commit)) {
FieldStats stats = SeqNoFieldMapper.SeqNoDefaults.FIELD_TYPE.stats(reader);
final long highestSeqNo;
if (stats != null) {
highestSeqNo = (long) stats.getMaxValue();
} else {
highestSeqNo = SequenceNumbersService.NO_OPS_PERFORMED;
}
// make sure localCheckpoint <= highest seq no found <= maxSeqNo
assertThat(highestSeqNo, greaterThanOrEqualTo(localCheckpoint));
assertThat(highestSeqNo, lessThanOrEqualTo(maxSeqNo));
// make sure all sequence numbers up to and including the local checkpoint are in the index
FixedBitSet seqNosBitSet = getSeqNosSet(reader, highestSeqNo);
for (int i = 0; i <= localCheckpoint; i++) {
assertTrue("local checkpoint [" + localCheckpoint + "], _seq_no [" + i + "] should be indexed", seqNosBitSet.get(i));
}
}
prevLocalCheckpoint = localCheckpoint;
prevMaxSeqNo = maxSeqNo;
}
}
}
use of org.apache.lucene.util.FixedBitSet in project elasticsearch by elastic.
the class MultiValueModeTests method randomRootDocs.
private static FixedBitSet randomRootDocs(int maxDoc) {
FixedBitSet set = new FixedBitSet(maxDoc);
for (int i = 0; i < maxDoc; ++i) {
if (randomBoolean()) {
set.set(i);
}
}
// the last doc must be a root doc
set.set(maxDoc - 1);
return set;
}
Aggregations