Examples with MockRandomPostingsFormat - org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat

Example 1 with MockRandomPostingsFormat

use of org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat in project lucene-solr by apache.

the class BaseIndexFileFormatTestCase method testRamBytesUsed.

/** Test the accuracy of the ramBytesUsed estimations. */
@Slow
public void testRamBytesUsed() throws IOException {
    if (Codec.getDefault() instanceof RandomCodec) {
        // this test relies on the fact that two segments will be written with
        // the same codec so we need to disable MockRandomPF
        final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
        avoidCodecs.add(new MockRandomPostingsFormat().getName());
        Codec.setDefault(new RandomCodec(random(), avoidCodecs));
    }
    Directory dir = applyCreatedVersionMajor(newDirectory());
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, cfg);
    // we need to index enough documents so that constant overhead doesn't dominate
    final int numDocs = atLeast(10000);
    LeafReader reader1 = null;
    for (int i = 0; i < numDocs; ++i) {
        Document d = new Document();
        addRandomFields(d);
        w.addDocument(d);
        if (i == 100) {
            w.forceMerge(1);
            w.commit();
            reader1 = getOnlyLeafReader(DirectoryReader.open(dir));
        }
    }
    w.forceMerge(1);
    w.commit();
    w.close();
    LeafReader reader2 = getOnlyLeafReader(DirectoryReader.open(dir));
    for (LeafReader reader : Arrays.asList(reader1, reader2)) {
        new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
    }
    final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
    final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
    final long absoluteError = actualBytes - expectedBytes;
    final double relativeError = (double) absoluteError / actualBytes;
    final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100 * relativeError + "% error";
    assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
    reader1.close();
    reader2.close();
    dir.close();
}

Also used : MockRandomPostingsFormat(org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory)

Example 2 with MockRandomPostingsFormat

use of org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat in project lucene-solr by apache.

the class TestRuleSetupAndRestoreClassEnv method before.

@Override
protected void before() throws Exception {
    // we do this in beforeClass, because some tests currently disable it
    if (System.getProperty("solr.directoryFactory") == null) {
        System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory");
    }
    // if verbose: print some debugging stuff about which codecs are loaded.
    if (VERBOSE) {
        System.out.println("Loaded codecs: " + Codec.availableCodecs());
        System.out.println("Loaded postingsFormats: " + PostingsFormat.availablePostingsFormats());
    }
    savedInfoStream = InfoStream.getDefault();
    final Random random = RandomizedContext.current().getRandom();
    final boolean v = random.nextBoolean();
    if (INFOSTREAM) {
        InfoStream.setDefault(new ThreadNameFixingPrintStreamInfoStream(System.out));
    } else if (v) {
        InfoStream.setDefault(new NullInfoStream());
    }
    Class<?> targetClass = RandomizedContext.current().getTargetClass();
    avoidCodecs = new HashSet<>();
    if (targetClass.isAnnotationPresent(SuppressCodecs.class)) {
        SuppressCodecs a = targetClass.getAnnotation(SuppressCodecs.class);
        avoidCodecs.addAll(Arrays.asList(a.value()));
    }
    savedCodec = Codec.getDefault();
    int randomVal = random.nextInt(11);
    if ("default".equals(TEST_CODEC)) {
        // just use the default, don't randomize
        codec = savedCodec;
    } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) {
        // the user wired postings or DV: this is messy
        // refactor into RandomCodec....
        final PostingsFormat format;
        if ("random".equals(TEST_POSTINGSFORMAT)) {
            format = new AssertingPostingsFormat();
        } else if ("MockRandom".equals(TEST_POSTINGSFORMAT)) {
            format = new MockRandomPostingsFormat(new Random(random.nextLong()));
        } else {
            format = PostingsFormat.forName(TEST_POSTINGSFORMAT);
        }
        final DocValuesFormat dvFormat;
        if ("random".equals(TEST_DOCVALUESFORMAT)) {
            dvFormat = new AssertingDocValuesFormat();
        } else {
            dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT);
        }
        codec = new AssertingCodec() {

            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return format;
            }

            @Override
            public DocValuesFormat getDocValuesFormatForField(String field) {
                return dvFormat;
            }

            @Override
            public String toString() {
                return super.toString() + ": " + format.toString() + ", " + dvFormat.toString();
            }
        };
    } else if ("SimpleText".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 9 && LuceneTestCase.rarely(random) && !shouldAvoidCodec("SimpleText"))) {
        codec = new SimpleTextCodec();
    } else if ("CheapBastard".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 8 && !shouldAvoidCodec("CheapBastard") && !shouldAvoidCodec("Lucene41"))) {
        // we also avoid this codec if Lucene41 is avoided, since thats the postings format it uses.
        codec = new CheapBastardCodec();
    } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 7 && !shouldAvoidCodec("Asserting"))) {
        codec = new AssertingCodec();
    } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
        codec = CompressingCodec.randomInstance(random);
    } else if ("Lucene70".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene70"))) {
        codec = new Lucene70Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
    } else if (!"random".equals(TEST_CODEC)) {
        codec = Codec.forName(TEST_CODEC);
    } else if ("random".equals(TEST_POSTINGSFORMAT)) {
        codec = new RandomCodec(random, avoidCodecs);
    } else {
        assert false;
    }
    Codec.setDefault(codec);
    // Initialize locale/ timezone.
    String testLocale = System.getProperty("tests.locale", "random");
    String testTimeZone = System.getProperty("tests.timezone", "random");
    // Always pick a random one for consistency (whether tests.locale was specified or not).
    savedLocale = Locale.getDefault();
    Locale randomLocale = randomLocale(random);
    locale = testLocale.equals("random") ? randomLocale : localeForLanguageTag(testLocale);
    Locale.setDefault(locale);
    savedTimeZone = TimeZone.getDefault();
    TimeZone randomTimeZone = randomTimeZone(random());
    timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
    TimeZone.setDefault(timeZone);
    similarity = new RandomSimilarity(random());
    // Check codec restrictions once at class level.
    try {
        checkCodecRestrictions(codec);
    } catch (AssumptionViolatedException e) {
        System.err.println("NOTE: " + e.getMessage() + " Suppressed codecs: " + Arrays.toString(avoidCodecs.toArray()));
        throw e;
    }
    // We have "stickiness" so that sometimes all we do is vary the RAM buffer size, other times just the doc count to flush by, else both.
    // This way the assertMemory in DocumentsWriterFlushControl sometimes runs (when we always flush by RAM).
    LiveIWCFlushMode flushMode;
    switch(random().nextInt(3)) {
        case 0:
            flushMode = LiveIWCFlushMode.BY_RAM;
            break;
        case 1:
            flushMode = LiveIWCFlushMode.BY_DOCS;
            break;
        case 2:
            flushMode = LiveIWCFlushMode.EITHER;
            break;
        default:
            throw new AssertionError();
    }
    LuceneTestCase.setLiveIWCFlushMode(flushMode);
    initialized = true;
}

Also used : Locale(java.util.Locale) LuceneTestCase.randomLocale(org.apache.lucene.util.LuceneTestCase.randomLocale) AssumptionViolatedException(org.junit.internal.AssumptionViolatedException) CheapBastardCodec(org.apache.lucene.codecs.cheapbastard.CheapBastardCodec) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) AssertingDocValuesFormat(org.apache.lucene.codecs.asserting.AssertingDocValuesFormat) AssertingPostingsFormat(org.apache.lucene.codecs.asserting.AssertingPostingsFormat) Random(java.util.Random) AssertingDocValuesFormat(org.apache.lucene.codecs.asserting.AssertingDocValuesFormat) RandomCodec(org.apache.lucene.index.RandomCodec) AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) LiveIWCFlushMode(org.apache.lucene.util.LuceneTestCase.LiveIWCFlushMode) RandomSimilarity(org.apache.lucene.search.similarities.RandomSimilarity) SimpleTextCodec(org.apache.lucene.codecs.simpletext.SimpleTextCodec) SuppressCodecs(org.apache.lucene.util.LuceneTestCase.SuppressCodecs) MockRandomPostingsFormat(org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat) LuceneTestCase.randomTimeZone(org.apache.lucene.util.LuceneTestCase.randomTimeZone) TimeZone(java.util.TimeZone) AssertingPostingsFormat(org.apache.lucene.codecs.asserting.AssertingPostingsFormat) MockRandomPostingsFormat(org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat) PostingsFormat(org.apache.lucene.codecs.PostingsFormat) Lucene70Codec(org.apache.lucene.codecs.lucene70.Lucene70Codec)

Aggregations

MockRandomPostingsFormat (org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat)2 HashSet (java.util.HashSet)1 Locale (java.util.Locale)1 Random (java.util.Random)1 TimeZone (java.util.TimeZone)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 DocValuesFormat (org.apache.lucene.codecs.DocValuesFormat)1 PostingsFormat (org.apache.lucene.codecs.PostingsFormat)1 AssertingCodec (org.apache.lucene.codecs.asserting.AssertingCodec)1 AssertingDocValuesFormat (org.apache.lucene.codecs.asserting.AssertingDocValuesFormat)1 AssertingPostingsFormat (org.apache.lucene.codecs.asserting.AssertingPostingsFormat)1 CheapBastardCodec (org.apache.lucene.codecs.cheapbastard.CheapBastardCodec)1 Lucene70Codec (org.apache.lucene.codecs.lucene70.Lucene70Codec)1 SimpleTextCodec (org.apache.lucene.codecs.simpletext.SimpleTextCodec)1 Document (org.apache.lucene.document.Document)1 RandomCodec (org.apache.lucene.index.RandomCodec)1 RandomSimilarity (org.apache.lucene.search.similarities.RandomSimilarity)1 Directory (org.apache.lucene.store.Directory)1 LiveIWCFlushMode (org.apache.lucene.util.LuceneTestCase.LiveIWCFlushMode)1 SuppressCodecs (org.apache.lucene.util.LuceneTestCase.SuppressCodecs)1