Search in sources :

Example 1 with TermStats

use of org.apache.lucene.codecs.TermStats in project lucene-solr by apache.

the class MockRandomPostingsFormat method fieldsConsumer.

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    int minSkipInterval;
    if (state.segmentInfo.maxDoc() > 1000000) {
        // Test2BPostings can OOME otherwise:
        minSkipInterval = 3;
    } else {
        minSkipInterval = 2;
    }
    // we pull this before the seed intentionally: because it's not consumed at runtime
    // (the skipInterval is written into postings header).
    // NOTE: Currently not passed to postings writer.
    //       before, it was being passed in wrongly as acceptableOverhead!
    int skipInterval = TestUtil.nextInt(seedRandom, minSkipInterval, 10);
    if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
    }
    final long seed = seedRandom.nextLong();
    if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing to seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
    }
    final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
    try (IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
        CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
        out.writeLong(seed);
        CodecUtil.writeFooter(out);
    }
    final Random random = new Random(seed);
    // consume a random for buffersize
    random.nextInt();
    PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
    final FieldsConsumer fields;
    final int t1 = random.nextInt(5);
    if (t1 == 0) {
        boolean success = false;
        try {
            fields = new FSTTermsWriter(state, postingsWriter);
            success = true;
        } finally {
            if (!success) {
                postingsWriter.close();
            }
        }
    } else if (t1 == 1) {
        boolean success = false;
        try {
            fields = new FSTOrdTermsWriter(state, postingsWriter);
            success = true;
        } finally {
            if (!success) {
                postingsWriter.close();
            }
        }
    } else if (t1 == 2) {
        if (LuceneTestCase.VERBOSE) {
            System.out.println("MockRandomCodec: writing BlockTree terms dict");
        }
        // TODO: would be nice to allow 1 but this is very
        // slow to write
        final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
        final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
        boolean success = false;
        try {
            fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
            success = true;
        } finally {
            if (!success) {
                postingsWriter.close();
            }
        }
    } else if (t1 == 3) {
        if (LuceneTestCase.VERBOSE) {
            System.out.println("MockRandomCodec: writing Block terms dict");
        }
        boolean success = false;
        final TermsIndexWriterBase indexWriter;
        try {
            if (random.nextBoolean()) {
                int termIndexInterval = TestUtil.nextInt(random, 1, 100);
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")");
                }
                indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
            } else {
                final VariableGapTermsIndexWriter.IndexTermSelector selector;
                final int n2 = random.nextInt(3);
                if (n2 == 0) {
                    final int tii = TestUtil.nextInt(random, 1, 100);
                    selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
                    if (LuceneTestCase.VERBOSE) {
                        System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
                    }
                } else if (n2 == 1) {
                    final int docFreqThresh = TestUtil.nextInt(random, 2, 100);
                    final int tii = TestUtil.nextInt(random, 1, 100);
                    selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
                } else {
                    final long seed2 = random.nextLong();
                    final int gap = TestUtil.nextInt(random, 2, 40);
                    if (LuceneTestCase.VERBOSE) {
                        System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
                    }
                    selector = new VariableGapTermsIndexWriter.IndexTermSelector() {

                        final Random rand = new Random(seed2);

                        @Override
                        public boolean isIndexTerm(BytesRef term, TermStats stats) {
                            return rand.nextInt(gap) == gap / 2;
                        }

                        @Override
                        public void newField(FieldInfo fieldInfo) {
                        }
                    };
                }
                indexWriter = new VariableGapTermsIndexWriter(state, selector);
            }
            success = true;
        } finally {
            if (!success) {
                postingsWriter.close();
            }
        }
        success = false;
        try {
            fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
            success = true;
        } finally {
            if (!success) {
                try {
                    postingsWriter.close();
                } finally {
                    indexWriter.close();
                }
            }
        }
    } else if (t1 == 4) {
        // Use OrdsBlockTree terms dict
        if (LuceneTestCase.VERBOSE) {
            System.out.println("MockRandomCodec: writing OrdsBlockTree");
        }
        // TODO: would be nice to allow 1 but this is very
        // slow to write
        final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
        final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
        boolean success = false;
        try {
            fields = new OrdsBlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
            success = true;
        } finally {
            if (!success) {
                postingsWriter.close();
            }
        }
    } else {
        // BUG!
        throw new AssertionError();
    }
    return fields;
}
Also used : FieldsConsumer(org.apache.lucene.codecs.FieldsConsumer) OrdsBlockTreeTermsWriter(org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter) FSTTermsWriter(org.apache.lucene.codecs.memory.FSTTermsWriter) PostingsWriterBase(org.apache.lucene.codecs.PostingsWriterBase) Random(java.util.Random) BlockTermsWriter(org.apache.lucene.codecs.blockterms.BlockTermsWriter) BytesRef(org.apache.lucene.util.BytesRef) FSTOrdTermsWriter(org.apache.lucene.codecs.memory.FSTOrdTermsWriter) BlockTreeTermsWriter(org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter) OrdsBlockTreeTermsWriter(org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter) IndexOutput(org.apache.lucene.store.IndexOutput) TermStats(org.apache.lucene.codecs.TermStats) VariableGapTermsIndexWriter(org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter) FixedGapTermsIndexWriter(org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter) TermsIndexWriterBase(org.apache.lucene.codecs.blockterms.TermsIndexWriterBase) Lucene50PostingsWriter(org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

Random (java.util.Random)1 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)1 PostingsWriterBase (org.apache.lucene.codecs.PostingsWriterBase)1 TermStats (org.apache.lucene.codecs.TermStats)1 BlockTermsWriter (org.apache.lucene.codecs.blockterms.BlockTermsWriter)1 FixedGapTermsIndexWriter (org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter)1 TermsIndexWriterBase (org.apache.lucene.codecs.blockterms.TermsIndexWriterBase)1 VariableGapTermsIndexWriter (org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter)1 BlockTreeTermsWriter (org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter)1 OrdsBlockTreeTermsWriter (org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter)1 Lucene50PostingsWriter (org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter)1 FSTOrdTermsWriter (org.apache.lucene.codecs.memory.FSTOrdTermsWriter)1 FSTTermsWriter (org.apache.lucene.codecs.memory.FSTTermsWriter)1 FieldInfo (org.apache.lucene.index.FieldInfo)1 IndexOutput (org.apache.lucene.store.IndexOutput)1 BytesRef (org.apache.lucene.util.BytesRef)1