Examples with MockAnalyzer - org.apache.lucene.analysis.MockAnalyzer

Example 91 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testNumericMergeAwayAllValuesLargeSegment.

// same as testNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testNumericMergeAwayAllValuesLargeSegment() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
    Document doc = new Document();
    doc.add(new StringField("id", "1", Field.Store.NO));
    doc.add(new NumericDocValuesField("field", 42L));
    iwriter.addDocument(doc);
    final int numEmptyDocs = atLeast(1024);
    for (int i = 0; i < numEmptyDocs; ++i) {
        iwriter.addDocument(new Document());
    }
    iwriter.commit();
    iwriter.deleteDocuments(new Term("id", "1"));
    iwriter.forceMerge(1);
    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();
    NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
    assertEquals(NO_MORE_DOCS, dv.nextDoc());
    ireader.close();
    directory.close();
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 92 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testThreads.

/** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */
public void testThreads() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field storedBinField = new StoredField("storedBin", new byte[0]);
    Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
    Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
    Field storedNumericField = new StoredField("storedNum", "");
    Field dvNumericField = new NumericDocValuesField("dvNum", 0);
    doc.add(idField);
    doc.add(storedBinField);
    doc.add(dvBinField);
    doc.add(dvSortedField);
    doc.add(storedNumericField);
    doc.add(dvNumericField);
    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        idField.setStringValue(Integer.toString(i));
        int length = TestUtil.nextInt(random(), 0, 8);
        byte[] buffer = new byte[length];
        random().nextBytes(buffer);
        storedBinField.setBytesValue(buffer);
        dvBinField.setBytesValue(buffer);
        dvSortedField.setBytesValue(buffer);
        long numericValue = random().nextLong();
        storedNumericField.setStringValue(Long.toString(numericValue));
        dvNumericField.setLongValue(numericValue);
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    writer.close();
    // compare
    final DirectoryReader ir = DirectoryReader.open(dir);
    int numThreads = TestUtil.nextInt(random(), 2, 7);
    Thread[] threads = new Thread[numThreads];
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int i = 0; i < threads.length; i++) {
        threads[i] = new Thread() {

            @Override
            public void run() {
                try {
                    startingGun.await();
                    for (LeafReaderContext context : ir.leaves()) {
                        LeafReader r = context.reader();
                        BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
                        SortedDocValues sorted = r.getSortedDocValues("dvSorted");
                        NumericDocValues numerics = r.getNumericDocValues("dvNum");
                        for (int j = 0; j < r.maxDoc(); j++) {
                            BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
                            assertEquals(j, binaries.nextDoc());
                            BytesRef scratch = binaries.binaryValue();
                            assertEquals(binaryValue, scratch);
                            assertEquals(j, sorted.nextDoc());
                            scratch = sorted.binaryValue();
                            assertEquals(binaryValue, scratch);
                            String expected = r.document(j).get("storedNum");
                            assertEquals(j, numerics.nextDoc());
                            assertEquals(Long.parseLong(expected), numerics.longValue());
                        }
                    }
                    TestUtil.checkReader(ir);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[i].start();
    }
    startingGun.countDown();
    for (Thread t : threads) {
        t.join();
    }
    ir.close();
    dir.close();
}

Also used : Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) IOException(java.io.IOException) StringField(org.apache.lucene.document.StringField)

Example 93 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testBinaryMergeAwayAllValuesLargeSegment.

// same as testBinaryMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
    Document doc = new Document();
    doc.add(new StringField("id", "1", Field.Store.NO));
    doc.add(new BinaryDocValuesField("field", new BytesRef("hello")));
    iwriter.addDocument(doc);
    final int numEmptyDocs = atLeast(1024);
    for (int i = 0; i < numEmptyDocs; ++i) {
        iwriter.addDocument(new Document());
    }
    iwriter.commit();
    iwriter.deleteDocuments(new Term("id", "1"));
    iwriter.forceMerge(1);
    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();
    BinaryDocValues dv = getOnlyLeafReader(ireader).getBinaryDocValues("field");
    assertEquals(NO_MORE_DOCS, dv.nextDoc());
    ireader.close();
    directory.close();
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 94 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestSortedVsStoredFields.

protected void doTestSortedVsStoredFields(int numDocs, double density, Supplier<byte[]> bytes) throws Exception {
    Directory dir = newFSDirectory(createTempDir("dvduel"));
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field storedField = new StoredField("stored", new byte[0]);
    Field dvField = new SortedDocValuesField("dv", new BytesRef());
    doc.add(idField);
    doc.add(storedField);
    doc.add(dvField);
    // index some docs
    for (int i = 0; i < numDocs; i++) {
        if (random().nextDouble() > density) {
            writer.addDocument(new Document());
            continue;
        }
        idField.setStringValue(Integer.toString(i));
        byte[] buffer = bytes.get();
        storedField.setBytesValue(buffer);
        dvField.setBytesValue(buffer);
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    // compare
    DirectoryReader ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        BinaryDocValues docValues = DocValues.getBinary(r, "dv");
        docValues.nextDoc();
        for (int i = 0; i < r.maxDoc(); i++) {
            BytesRef binaryValue = r.document(i).getBinaryValue("stored");
            if (binaryValue == null) {
                assertTrue(docValues.docID() > i);
            } else {
                assertEquals(i, docValues.docID());
                assertEquals(binaryValue, docValues.binaryValue());
                docValues.nextDoc();
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
    }
    ir.close();
    writer.forceMerge(1);
    // compare again
    ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        BinaryDocValues docValues = DocValues.getBinary(r, "dv");
        docValues.nextDoc();
        for (int i = 0; i < r.maxDoc(); i++) {
            BytesRef binaryValue = r.document(i).getBinaryValue("stored");
            if (binaryValue == null) {
                assertTrue(docValues.docID() > i);
            } else {
                assertEquals(i, docValues.docID());
                assertEquals(binaryValue, docValues.binaryValue());
                docValues.nextDoc();
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
    }
    ir.close();
    writer.close();
    dir.close();
}

Also used : Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 95 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testSortedSetTwoDocumentsLastMissingMerge.

public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
    Document doc = new Document();
    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
    iwriter.addDocument(doc);
    iwriter.commit();
    doc = new Document();
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);
    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();
    SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
    assertEquals(1, dv.getValueCount());
    assertEquals(0, dv.nextDoc());
    assertEquals(0, dv.nextOrd());
    assertEquals(NO_MORE_ORDS, dv.nextOrd());
    BytesRef bytes = dv.lookupOrd(0);
    assertEquals(new BytesRef("hello"), bytes);
    ireader.close();
    directory.close();
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1164 Directory (org.apache.lucene.store.Directory)785 Document (org.apache.lucene.document.Document)775 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)265 Analyzer (org.apache.lucene.analysis.Analyzer)259 BytesRef (org.apache.lucene.util.BytesRef)252 StringField (org.apache.lucene.document.StringField)183 Term (org.apache.lucene.index.Term)183 RAMDirectory (org.apache.lucene.store.RAMDirectory)168 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)165 Field (org.apache.lucene.document.Field)164 TextField (org.apache.lucene.document.TextField)159 Test (org.junit.Test)142 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)136 IndexReader (org.apache.lucene.index.IndexReader)134 IndexWriter (org.apache.lucene.index.IndexWriter)133 TermQuery (org.apache.lucene.search.TermQuery)121 FieldType (org.apache.lucene.document.FieldType)119 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)111