use of org.apache.lucene.codecs.TermStats in project lucene-solr by apache.
the class MockRandomPostingsFormat method fieldsConsumer.
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
int minSkipInterval;
if (state.segmentInfo.maxDoc() > 1000000) {
// Test2BPostings can OOME otherwise:
minSkipInterval = 3;
} else {
minSkipInterval = 2;
}
// we pull this before the seed intentionally: because it's not consumed at runtime
// (the skipInterval is written into postings header).
// NOTE: Currently not passed to postings writer.
// before, it was being passed in wrongly as acceptableOverhead!
int skipInterval = TestUtil.nextInt(seedRandom, minSkipInterval, 10);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
}
final long seed = seedRandom.nextLong();
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing to seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
try (IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
out.writeLong(seed);
CodecUtil.writeFooter(out);
}
final Random random = new Random(seed);
// consume a random for buffersize
random.nextInt();
PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
final FieldsConsumer fields;
final int t1 = random.nextInt(5);
if (t1 == 0) {
boolean success = false;
try {
fields = new FSTTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 1) {
boolean success = false;
try {
fields = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 2) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing BlockTree terms dict");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
boolean success = false;
try {
fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 3) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Block terms dict");
}
boolean success = false;
final TermsIndexWriterBase indexWriter;
try {
if (random.nextBoolean()) {
int termIndexInterval = TestUtil.nextInt(random, 1, 100);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")");
}
indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
} else {
final VariableGapTermsIndexWriter.IndexTermSelector selector;
final int n2 = random.nextInt(3);
if (n2 == 0) {
final int tii = TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
}
} else if (n2 == 1) {
final int docFreqThresh = TestUtil.nextInt(random, 2, 100);
final int tii = TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
} else {
final long seed2 = random.nextLong();
final int gap = TestUtil.nextInt(random, 2, 40);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
}
selector = new VariableGapTermsIndexWriter.IndexTermSelector() {
final Random rand = new Random(seed2);
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
return rand.nextInt(gap) == gap / 2;
}
@Override
public void newField(FieldInfo fieldInfo) {
}
};
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
}
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
} else if (t1 == 4) {
// Use OrdsBlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing OrdsBlockTree");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
boolean success = false;
try {
fields = new OrdsBlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else {
// BUG!
throw new AssertionError();
}
return fields;
}
Aggregations